### Create Subsets for Inference

In [9]:
import json
import random

# Define the seed for reproducibility
RANDOM_SEED = 42
# Define the fraction of lines you want in the random subset
FRACTION = 0.1

def create_random_subset_jsonl(input_filepath, output_filepath, seed):
    random.seed(seed)  # Set the seed for reproducible results
    lines = []

    # Step 1: Read the original JSONL file and store the lines
    with open(input_filepath, 'r') as infile:
        for line in infile:
            lines.append(line.strip())  # Strip to remove the newline at the end

    # Step 2: Randomly select a subset of the lines
    subset_size = int(FRACTION * len(lines))
    random_subset = random.sample(lines, subset_size)

    # Step 3: Write the random subset to the output JSONL file
    with open(output_filepath, 'w') as outfile:
        for line in random_subset:
            outfile.write(line + '\n')  # Add a newline at the end

# Replace with your input and output file paths
base_jsonl_name = "rg-one-gram-ws-20-ss-10-context-1000"
input_jsonl_fp = 'datasets/' + base_jsonl_name + '.jsonl'
output_jsonl_fp = 'subsets/' + base_jsonl_name + "_" + str(FRACTION) + '.jsonl'

# Create a random subset JSONL file
create_random_subset_jsonl(input_jsonl_fp, output_jsonl_fp, RANDOM_SEED)

### Edit Prompts to be the Same as Shown in the Paper

In [10]:
# Let's make sure to tell the model what it actually has to do as well (as given in paper)
def rreplace(s, old, new, occurrence):
    li = s.rsplit(old, occurrence)
    return new.join(li)

In [11]:
with open(output_jsonl_fp, 'r') as original_file, open(output_jsonl_fp.replace(".jsonl","_instruct.jsonl"), 'w') as output_file:
    for line in original_file:
        entry = json.loads(line.strip())
        completion = entry['prompt']
        metadata = entry["metadata"]
        
        output_entry = {
            "prompt": rreplace(entry['prompt'],"# --------------------------------------------------",'# --------------------------------------------------\n"""Based on the above, complete the following code:"""',1),
            "metadata": metadata
        }
        
        output_file.write(json.dumps(output_entry) + "\n")

### Run Inference with API

In [4]:
import os
import openai
import json

openai.api_base = "http://3.22.120.71:8888/v1"

# Input and output JSONL file paths
base_jsonl_name =  'rg-one-gram-ws-20-ss-2-one-line_0.1_instruct_10_variants'
ending = ".jsonl"

# Original and preprocessed JSONL file paths
original_responses_path = 'temp_raw_generations/' + base_jsonl_name + '_comp_llama_raw_generations' + ending
input_jsonl_file_path = 'temp_subsets/' + base_jsonl_name + '.jsonl'
output_jsonl_file_path = "temp_processed_generations/" + base_jsonl_name + "_comp_llama_generations" + ending

### Non-Multithreaded Request Code

In [5]:
import json
import time

# Function to process a single JSONL entry
import threading

# Function to process a single JSONL entry
def process_entry(entry, model_name="CodeLlama-7b-hf", timeout_seconds=40):
    generation_result = [None]  # A mutable object to store the generation result
    def generate():
        try:
            # Generate the completion with the OpenAI API
            response = openai.Completion.create(model=model_name,
                                                prompt=entry['prompt'],
                                                temperature=0,
                                                max_tokens=100)
            # Extract the text of the completion generated by the model
            generated_completion = response["choices"][0]["text"]
            generation_result[0] = generated_completion 
        except Exception as exc:
            print(str(exc))

    # Start a thread to run the generation function
    while True:
        generation_thread = threading.Thread(target=generate)
        generation_thread.start()

        # Wait for the thread to complete or timeout
        generation_thread.join(timeout=timeout_seconds)
        if generation_thread.is_alive():
            # If thread is still alive after the timeout, we abort the completion
            print(f"Completion for task_id={entry['metadata']['task_id']} timed out.")
            pass
        else:
            return generation_result[0]  # Return the generation result

# Function to create a unique identifier for an entry
def create_identifier(metadata):
    return (metadata.get("task_id"), metadata.get("instance_id"))

# Read the existing JSONL file to check for duplicates
def check_existing_entries(file_path):
    existing_entries = set()
    try:
        with open(file_path, 'r') as file:
            for line in file:
                entry = json.loads(line.strip())
                metadata = entry.get("metadata", {})
                identifier = create_identifier(metadata)
                existing_entries.add(identifier)
    except FileNotFoundError:
        pass  # It's okay if file doesn't exist, we'll create it.
    return existing_entries

# Prepare the set of existing identifiers
existing_identifiers = check_existing_entries(original_responses_path)

# Read the input JSONL file and generate completions
with open(input_jsonl_file_path, 'r') as input_file, open(original_responses_path, 'a') as original_file:
    for line in input_file:
        entry = json.loads(line.strip())
        metadata = entry["metadata"]
        identifier = create_identifier(metadata)
        
        # Check if the entry already exists
        if identifier in existing_identifiers:
            print(f'Skipping generation, entry with task_id={metadata["task_id"]} and instance_id={metadata.get("instance_id")} already exists.')
            continue

        # Process the entry if it does not exist
        output_entry = process_entry(entry)
        
        # Save the original response to a new JSONL
        original_file.write(json.dumps({"prompt": entry['prompt'], "choices": [{"text": output_entry}], "metadata" : metadata}) + "\n")
        
        # Add the identifier to the set to avoid processing it again
        existing_identifiers.add(identifier)

Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_0 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_1 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_2 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_3 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_4 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_5 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_6 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_7 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and instance_id=pytorch_rl/156_8 already exists.
Skipping generation, entry with task_id=pytorch_rl/156 and insta

### Multithreaded Version

In [2]:
import json
import concurrent.futures
import time

# Function to process a single JSONL entry
def process_entry(entry, model_name="CodeLlama-13b-hf"):    
    while True:
        try:
            # Generate the completion with the OpenAI API
            response = openai.Completion.create(
                model = model_name, 
                prompt = entry['prompt'], 
                temperature = 0,
                max_tokens = 100)
            # Extract the text of the completion generated by the model
            generated_completion = response["choices"][0]["text"]
            if generated_completion != "":
                return generated_completion
            else:
                print("Generated completion is empty")
                return ""
        except Exception as exc:
            print(str(exc))
            time.sleep(30)
            continue
            # if "Rate limit" in str(exc):
            #     print("Rate limit reached, retrying in 30 seconds...")
            #     time.sleep(30)
            #     continue
            # else:
            #     raise

# Function to write the result to a file
def write_result(entry, output_entry, original_responses_path):
    with open(original_responses_path, 'a') as original_file:
        original_file.write(json.dumps({"prompt": entry['prompt'], "choices": [{"text": output_entry}], "metadata": entry['metadata']}) + "\n")

# Main code to read the input JSONL file and generate completions
with open(input_jsonl_file_path, 'r') as input_file:
    entries = [json.loads(line.strip()) for line in input_file]

with concurrent.futures.ThreadPoolExecutor() as executor:
    # Submit all the tasks to the executor and start them concurrently
    future_to_entry = {executor.submit(process_entry, entry): entry for entry in entries}

    # As each future completes, write the result
    for future in concurrent.futures.as_completed(future_to_entry):
        entry = future_to_entry[future]
        try:
            output_entry = future.result()
            write_result(entry, output_entry, original_responses_path)
        except Exception as exc:
            print(f'Generated an exception: {exc}')

Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71', port=8888): Read timed out. (read timeout=600)
Request timed out: HTTPConnectionPool(host='3.22.120.71

### Perform Preprocessing

In [6]:
# Function to preprocess the completion string
def preprocess_completion(completion):
    # Extract content within ```...```
    end = None  # End index of content inside ```
    if '```python' in completion:
        start = completion.find('```python') + 9  # Start index of content inside ```python
        end = completion.find('```', start)  # Find ending backticks after the start
    elif '```' in completion:
        start = completion.find('```') + 3  # Start index of content inside ```
        end = completion.find('```', start)  # Find ending backticks after the start
    elif '```py' in completion:
        start = completion.find('```') + 5  # Start index of content inside ```py
        end = completion.find('```', start)  # Find ending backticks after the start

    if end == -1:  # If the closing backticks are not found
        end = len(completion)  # Set end to the length of the string

    # Extract the content if valid start and end indices are found
    if end is not None and start < end:
        completion = completion[start:end]
    else:
        return completion  # Return the original string if no valid code block is found

    # Remove lines starting with '#'
    lines = [line.split('#', 1)[0].rstrip() for line in completion.split('\n')]

    # Save only the first non-empty line
    for line in lines:
        if line.strip():
            return " ".join(line.split())

    return completion  # Return the original string if no non-empty lines are found

In [7]:
# Now, read the original responses JSONL, preprocess, and write back out to the second JSONL
processed_path = output_jsonl_file_path.replace(".jsonl","_processed.jsonl")
with open(original_responses_path, 'r') as original_file, open(processed_path, 'w') as output_file:
    for line in original_file:
        entry = json.loads(line.strip())
        choices = entry['choices']
        metadata = entry["metadata"]
        completion = choices[0]['text']
        new_metadata = {}
        for key in metadata.keys():
            if key != "ground_truth":
                new_metadata[key] = metadata[key]
            else:
                new_metadata[key] = preprocess_completion(metadata["ground_truth"])
        preprocessed_completion = preprocess_completion(completion)
        
        output_entry = {
            "prompt": entry['prompt'],
            "choices": [{"text": preprocessed_completion}],
            "metadata": new_metadata
        }
        
        output_file.write(json.dumps(output_entry) + "\n")

### Evaluate Subsets

In [8]:
!py compute_score.py {original_responses_path}

EM
0.3977	440	pytorch_rl
0.6882	170	huggingface_diffusers
0.5083	240	opendilab_ACE
0.6417	240	awslabs_fortuna
0.3889	180	alibaba_FederatedScope
0.4462	130	google_vizier
0.4278	180	huggingface_evaluate
ES
0.6029	440	pytorch_rl
0.7937	170	huggingface_diffusers
0.6824	240	opendilab_ACE
0.7921	240	awslabs_fortuna
0.6752	180	alibaba_FederatedScope
0.6389	130	google_vizier
0.6083	180	huggingface_evaluate
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found bett

In [9]:
!py compute_score.py {processed_path}

EM
0.3977	440	pytorch_rl
0.6882	170	huggingface_diffusers
0.5083	240	opendilab_ACE
0.6417	240	awslabs_fortuna
0.3889	180	alibaba_FederatedScope
0.4462	130	google_vizier
0.4278	180	huggingface_evaluate
ES
0.6029	440	pytorch_rl
0.7937	170	huggingface_diffusers
0.6824	240	opendilab_ACE
0.7921	240	awslabs_fortuna
0.6752	180	alibaba_FederatedScope
0.6389	130	google_vizier
0.6083	180	huggingface_evaluate
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found bett

In [46]:
!py compute_score.py "processed_generations\rg-one-gram-ws-20-ss-2-one-line_0.1_instruct_3_variants_raw_generations_fixed_processed.jsonl"

EM
0.3188	138	pytorch_rl
0.4861	72	awslabs_fortuna
0.3056	72	opendilab_ACE
0.3529	51	huggingface_diffusers
0.2778	54	alibaba_FederatedScope
0.2308	39	google_vizier
0.2778	54	huggingface_evaluate
ES
0.5217	138	pytorch_rl
0.7281	72	awslabs_fortuna
0.5596	72	opendilab_ACE
0.5495	51	huggingface_diffusers
0.6199	54	alibaba_FederatedScope
0.6097	39	google_vizier
0.555	54	huggingface_evaluate
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found better!
found bet

### Check if Generated Code is Syntactically Correct

In [None]:
# # This part is not done and still a work in progress
# import ast
# from pylint import epylint as lint
# from io import StringIO

# def check_generated_code(file_code, gen_code):
#     combined_code = file_code + "\n" + gen_code
    
#     # Syntax & Indentation Check
#     try:
#         # Attempt to parse the combined code into an AST
#         ast.parse(combined_code)
#     except (SyntaxError, IndentationError) as e:
#         return f"Syntax or indentation error: {e}"

#     # Static Analysis
#     # Save the combined code to a temporary file or use StringIO
#     temp_file_path = 'temp_code.py'
#     with open(temp_file_path, 'w') as temp_file:
#         temp_file.write(combined_code)
    
#     # Run pylint on the file
#     (pylint_stdout, pylint_stderr) = lint.py_run(temp_file_path, return_std=True)
#     stdout, stderr = pylint_stdout.getvalue(), pylint_stderr.getvalue()
    
#     # Assuming you are interested in errors (convention/refactor/warning messages may be ignored)
#     if stdout.strip():
#         return f"Pylint found issues with the code:\n{stdout}"
    
#     return "Generated code passed all checks."

# # Example usage
# file_code = """
# def existing_function():
#     pass
# """

# gen_code = """
# for i in range(10):
#     print(i)
# """

# result = check_generated_code(file_code, gen_code)
# print(result)