In [8]:
from modules.ollama import  OllamaHandler
from modules.agents.qa import Carlos

ollama_handler = OllamaHandler(model_name="deepseek-r1:7b", temperature=0.5)
qa = Carlos(ollama_handler)

In [9]:
from modules.models import *

task = BaseTask(
    definition="Create a function that returns the sum of two numbers.",
    function_name="sum_two_numbers",
    args=[
        FunctionArgs(name="a", type="int", description="The first number."),
        FunctionArgs(name="b", type="int", description="The second number."),
    ],
    complexity_level="easy",
    dod="The function should take two integers as input and return their sum as an integer.",
    keywords=["sum", "addition", "numbers"],   
)

In [10]:

test_suite = qa.create_tests_suite(task)

In [11]:
test_suite

TestSuite(test_cases=[TestCase(inputs='5,3', expected_output='`8`'), TestCase(inputs='0,0', expected_output='`0`'), TestCase(inputs='-10,-20', expected_output='`-30`')], test_code_raw="# FUNCTION_IMPLEMENTATION_HERE\n\nimport sys\nfor line in sys.stdin:\n    try:\n        num1, num2 = line.strip().split(',');\n        result = sum_two_numbers(int(num1), int(num2))\n        print(result)\n    except ValueError:\n        print('ValueError')")

In [12]:
implementation = \
"""
def sum_two_numbers(a: int, b: int) -> int:
    return a + b
"""



In [13]:
result = qa.run_tests(test_suite, implementation, task.function_name)

In [14]:
result

TestsResult(total_time=0.349139928817749, passed_tests=0, total_tests=3, errors=[ErrorDetail(test_input='0,0', expected_output='`0`', actual_output='0', error_message=''), ErrorDetail(test_input='5,3', expected_output='`8`', actual_output='8', error_message=''), ErrorDetail(test_input='-10,-20', expected_output='`-30`', actual_output='-30', error_message='')])

In [5]:
import subprocess
import sys

process = subprocess.run([sys.executable, "-m", "mypy", "./response_message_content.py"],text=True)

In [6]:
process

CompletedProcess(args=['d:\\Faculdade\\TCC\\bench\\.venv\\Scripts\\python.exe', '-m', 'mypy', './response_message_content.py'], returncode=1)

In [11]:
from mypy import api

std_out, std_err, exit_status = api.run(["./response_message_content.py"])

print("stdout:", std_out)
print("stderr:", std_err)
print("exit status:", exit_status)

stdout: response_message_content.py:9: [1m[91merror:[0m Name [0m[1m"sum_two_numbers"[0m is not defined  [0m[93m[name-defined][0m
[1m[91mFound 1 error in 1 file (checked 1 source file)[0m

stderr: 
exit status: 1


In [12]:
std_out.splitlines()

['response_message_content.py:9: \x1b[1m\x1b[91merror:\x1b[0m Name \x1b[0m\x1b[1m"sum_two_numbers"\x1b[0m is not defined  \x1b[0m\x1b[93m[name-defined]\x1b[0m',
 '\x1b[1m\x1b[91mFound 1 error in 1 file (checked 1 source file)\x1b[0m']

In [13]:
import re

ansi_escape = re.compile(r'\x1b\[[0-9;]*m')

cleaned_lines = [ansi_escape.sub('', line) for line in std_out.splitlines()]
cleaned_lines

['response_message_content.py:9: error: Name "sum_two_numbers" is not defined  [name-defined]',
 'Found 1 error in 1 file (checked 1 source file)']

In [None]:
from pathlib import Path
from typing import List, Optional
from dataclasses import dataclass
import re
from mypy import api

ERROR_RE = re.compile(r'^(?P<file>.+?):(?P<line>\d+): error: (?P<msg>.+?)(?: \[(?P<code>[^\]]+)\])?$')
WARNING_RE = re.compile(r'^(?P<file>.+?):(?P<line>\d+): warning: (?P<msg>.+?)(?: \[(?P<code>[^\]]+)\])?$')
ANSI_RE = re.compile(r'\x1b\[[0-9;]*m')


@dataclass
class CheckResult:
    success: bool
    errors: List[str]
    warnings: Optional[List[str]] = None


def mypy_check(
    file_path: Path,
    ignore_warnings: bool,
    ignore_function: Optional[str] = None
) -> CheckResult:
    
    stdout, stderr, exit_status = api.run([str(file_path)])
    clean_output = ANSI_RE.sub('', stdout)

    errors: List[str] = []
    warnings: List[str] = []

    for line in clean_output.splitlines():
        line = line.strip()
        
        if not line:
            continue

        if (m_err := ERROR_RE.match(line)):
            msg = m_err.group("msg")
            # Ignora se for exatamente a função pedida
            if ignore_function and f'"{ignore_function}"' in msg:
                continue
            errors.append(line)

        elif WARNING_RE.match(line):
            if not ignore_warnings:
                warnings.append(line)

    success = (exit_status == 0) or (len(errors) == 0)

    return CheckResult(
        success=success,
        errors=errors,
        warnings=None if ignore_warnings else warnings
    )

In [19]:
mypy_check(Path("./response_message_content.py"), ignore_warnings=False)

