In [None]:
!pip install openai

In [None]:
import openai

openai.api_key = "<KEY>"

In [None]:
import requests
from evalplus.data import get_human_eval_plus, write_jsonl

problems = get_human_eval_plus()

num_samples_per_task = 1
len(list(problems.keys()))

In [None]:
import time
def run(prompt, seed, port = 5000):
    while True:  # Keep trying until we break out
        try:
            result = openai.ChatCompletion.create(
                model="gpt-4",
                # model="gpt-4",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant. Please complete the following code snippet."},
                    {"role": "user", "content": prompt},
                ],
                temperature=0.0,
                max_tokens=500,
                n=1
            )

            response = result['choices'][0]["message"]["content"]
            return response
        except Exception as e:  # If we hit the rate limit
            print(e)
            time.sleep(1)  # Wait for a second before retrying

In [None]:
def get_function_body(code):
    lines = code.splitlines()
    function_lines = []
    found_def = False

    for line in lines:
        # If 'def ' is found in a line, mark that we've entered the function
        if 'def ' in line:
            found_def = True
            function_lines.append(line)
            continue

        # If we've entered the function, stop including lines when we hit a line that contains text but does not start with a whitespace character
        if found_def and line.strip() != '' and not line.startswith((' ', '\t')):
            break

        # Always include the line in the function lines
        function_lines.append(line)

    return '\n'.join(function_lines)

def cut_off_prefix(s):
    idx_from = s.find('from ')
    idx_def = s.find('def ')
    idx_import = s.find('import ')

    # Check if none of the keywords were found
    if idx_from == -1 and idx_def == -1 and idx_import == -1:
        return s

    # Prepare a list of found indices, excluding those where the keyword was not found
    indices = [idx for idx in [idx_from, idx_def, idx_import] if idx != -1]

    # Return the string starting from the earliest found keyword
    return s[min(indices):]
    
def generate_one_completion(prompt_code, seed = -1, port = 5000, long_prompt = False, user_tag = "HUMAN:", assistant_tag = "AI MODEL:", system_prefix = ""):
    print(seed)
    # suffix = 'def'+prompt_code.split("def")[1].split("(")[0]+"("
    suffix = ""
    if long_prompt:
        prompt = """%s
%s
Complete the following Python code: 
Notes: respond with the entire complete function definition
do not add any comments, be as concise in your code as possible
use only built-in libraries, assume no additional imports other than those provided (if any)

code:
%s
""" % (system_prefix, user_tag, prompt_code)# , assistant_tag, suffix)
    else:
        prompt = """```
%s
```""" % prompt_code
    
    code_result = run(prompt, seed = seed, port = port)
    # result = "\n".join(code_result.split("def")[-1].split("\n")[1:]).split("```")[0]
    result = cut_off_prefix(code_result)
    result = get_function_body(result)
    print("####", prompt, "####")
    print("***", result, "***")
    return result

import itertools

def run_benchmark(filename, maxnum=-1, port=5000, long_prompt = False, user_tag = "", assistant_tag = "", system_prefix = ""):
    iterc = itertools.count()
    problem_keys = list(problems)[:maxnum]
    all_samples = []

    for idx, task_id in enumerate(problem_keys):
        # Generate real completions
        for _ in range(num_samples_per_task):
            completion = generate_one_completion(problems[task_id]["prompt"], seed=next(iterc), port=port, long_prompt = long_prompt, user_tag = user_tag, assistant_tag = assistant_tag, system_prefix = system_prefix)
            all_samples.append(dict(task_id=task_id, completion=completion))

        # Create a temporary copy of all_samples, to which we will append 'pass' completions
        temp_samples = all_samples.copy()
        
        # Append 'pass' completions for the rest of the tasks
        for remaining_task_id in list(problems)[idx+1:maxnum] + list(problems)[maxnum:]:
            for _ in range(num_samples_per_task):
                temp_samples.append(dict(task_id=remaining_task_id, completion="    pass"))
        
        # Write all samples to the file, overwriting it completely
        write_jsonl(filename, temp_samples)


In [None]:
run_benchmark("gpt4_final_500c_long.jsonl", maxnum=-1, port=5000, long_prompt = True, user_tag = "", assistant_tag = "", system_prefix = "")

In [None]:
print("done")