In [1]:
from concurrent.futures import ThreadPoolExecutor
from human_eval.execution import check_correctness
from human_eval.data import HUMAN_EVAL, stream_jsonl

In [2]:
problems = list(stream_jsonl(HUMAN_EVAL))
executor = ThreadPoolExecutor(max_workers=5)

In [3]:
idx = 10
problem = problems[idx]
timeout = 10
print(problem['prompt'])
print('--')
# print(problem['canonical_solution'])
correct_completion = '''    if not string:
        return ''

    beginning_of_suffix = 0

    while not is_palindrome(string[beginning_of_suffix:]):
        beginning_of_suffix += 1

    return string + string[:beginning_of_suffix][::-1]'''
    
    
incorrect_completion = '''    return string + string[:][::-1]'''



def is_palindrome(string: str) -> bool:
    """ Test if given string is a palindrome """
    return string == string[::-1]


def make_palindrome(string: str) -> str:
    """ Find the shortest palindrome that begins with a supplied string.
    Algorithm idea is simple:
    - Find the longest postfix of supplied string that is a palindrome.
    - Append to the end of the string reverse of a string prefix that comes before the palindromic suffix.
    >>> make_palindrome('')
    ''
    >>> make_palindrome('cat')
    'catac'
    >>> make_palindrome('cata')
    'catac'
    """

--


In [4]:
## TODO: should be returning fraction of test cases passed, instead of 0/1
def execute(problem, completion, timeout):
    future = executor.submit(check_correctness, problem, completion, timeout)
    result = future.result()
    return result


In [5]:
print(correct_completion)

    if not string:
        return ''

    beginning_of_suffix = 0

    while not is_palindrome(string[beginning_of_suffix:]):
        beginning_of_suffix += 1

    return string + string[:beginning_of_suffix][::-1]


In [6]:
problem = problems[3]
completion = '''    balance = 0
    for operation in operations:
        balance += operation

        if balance < 0:
            return True

    return False'''

In [7]:
execute(problem, completion, timeout)

{'task_id': 'HumanEval/3',
 'passed': True,
 'result': 'passed',
 'completion_id': None}

In [8]:
execute(problem, incorrect_completion, timeout)

{'task_id': 'HumanEval/3',
 'passed': False,
 'result': 'failed: name \'string\' is not defined, traceback: Traceback (most recent call last):\n  File "/Users/arunpatro/llm-mcts/human-eval/human_eval/execution.py", line 49, in unsafe_execute\n    exec(check_program, exec_globals)\n  File "<string>", line 30, in <module>\n    check(below_zero)\n  File "<string>", line 23, in check\n    assert candidate([]) == False\n  File "<string>", line 13, in below_zero\n    return string + string[:][::-1]\nNameError: name \'string\' is not defined\n',
 'completion_id': None}

In [12]:
def stats_execute(problem, completion, timeout):
    pre_base_str, tests = problem['test'].split('def check(candidate):\n')
    base_str = "def check(candidate):\n"
    split_tests = [pre_base_str + base_str + i for i in tests.split('\n') if i != '']
    
    _problem = problem.copy()
    results = []
    for i in split_tests:
        _problem['test'] = i
        future = executor.submit(check_correctness, _problem, completion, timeout)
        result = future.result()
        results.append(result)
    
    
    return {'task_id': problem['task_id'], 'pass_rate': sum([i['passed'] for i in results])/len(results)}

In [13]:
completion = '''    return False'''

In [14]:
stats_execute(problem, completion, timeout)

{'task_id': 'HumanEval/3', 'pass_rate': 0.5}

In [23]:
idx = 20
problem = problems[idx]
print(problem['test'])



METADATA = {
    'author': 'jt',
    'dataset': 'test'
}


def check(candidate):
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2]) == (3.9, 4.0)
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0]) == (5.0, 5.9)
    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.2]) == (2.0, 2.2)
    assert candidate([1.0, 2.0, 3.0, 4.0, 5.0, 2.0]) == (2.0, 2.0)
    assert candidate([1.1, 2.2, 3.1, 4.1, 5.1]) == (2.2, 3.1)


