In [5]:
import json 
from api.openai_api import OPENAI_API
from api.google_api import GOOGLE_API
from api.anthropic_api import ANTHROPIC_API
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys
import os

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
dataset = 'ibm_claude-3-5-sonnet-20240620'
rewrite_model = 'gpt40_mini'

In [15]:
samples = {}

In [19]:
with open(f"data/{dataset}.json") as file:
    data = json.load(file)

In [20]:
len(data)

2984

In [17]:
data = data[:-1] # Just pick 50 samples

In [10]:
# Expanding data by duplicating each sample, one with 'human_code' and one with 'machine_code'
splitted_data = []

for sample in data:
    # Sample with human_code
    human_sample = {key: sample[key] for key in sample if key not in ['machine_code', 'human_code']}
    human_sample["code"] = sample["human_code"]
    human_sample["writer"] = "human"
    
    # Sample with machine_code
    machine_sample = {key: sample[key] for key in sample if key not in ['machine_code', 'human_code']}
    machine_sample["code"] = sample["machine_code"]
    machine_sample["writer"] = "machine"
    
    # Add both samples to the expanded_data list
    splitted_data.extend([human_sample, machine_sample])

In [11]:
len(splitted_data)

5968

In [12]:
# data_h = [{key: sample[key] for key in sample if key != "machine_code"} for sample in data]
# data_m = [{key: sample[key] for key in sample if key != "human_code"} for sample in data]

In [None]:
os.environ["OPENAI_API_KEY"] = ''
# os.environ["GOOGLE_API_KEY"] = ''
# os.environ["ANTHROPIC_API_KEY"] = ''

In [None]:
apio = OPENAI_API(model='gpt-4o-mini', temperature=1)
# apig = GOOGLE_API(model='gemini-1.5-flash', temperture=1)
# apia = ANTHROPIC_API(model='claude-3-5-sonnet-20240620', temperature=1)

In [None]:
def process_item_anthropic(item, idx):
    
    # pcode = "```python def main(): for a in range(1, 10):)```"
    code = item['code']
    rewrite_prompt = f"Please first explain the functionality of the Python code below. Then generate a possible rewrite for this Python code function according to your explanation. Please just give me a pure code in reponse, not any explanation or text. Please do not add any clarifications after the rewritten code. JUST PURE CODE, DONT SAY ANY WORD, OTHER THAN CODE. ANSOLUTLY NOTHING.\n CODE: \n {code}"

    # Generating the rewrite codes
    rewrite_codes = apia.communication_regen(prompt=rewrite_prompt, n=3)
    # CleanUp the generated codes
    rewrite_codes = [apia.codeSnippetCleanup(item) for item in rewrite_codes]
    item["rewrite"] = rewrite_codes
    return item

In [None]:
def process_item_openai(item, idx):
    
    # pcode = "```python def main(): for a in range(1, 10):)```"
    code = item['code']
    rewrite_prompt = f"Please first explain the functionality of the Python code below. Then generate a possible rewrite for this Python code function according to your explanation. Please just give me a pure code in reponse, not any explanation or text. Please do not add any clarifications after the rewritten code. JUST PURE CODE, DONT SAY ANY WORD, OTHER THAN CODE. ANSOLUTLY NOTHING.\n CODE: \n {code}"

    # Generating the rewrite codes
    rewrite_codes = apio.communication_regen(prompt=rewrite_prompt, n=3)
    # CleanUp the generated codes
    rewrite_codes = [apio.codeSnippetCleanup(item) for item in rewrite_codes]
    item["rewrite"] = rewrite_codes
    return item


In [None]:
def process_item_gemini(item, idx):
    
    # pcode = "```python def main(): for a in range(1, 10):)```"
    code = item['code']
    rewrite_prompt = f"Please first explain the functionality of the Python code below. Then generate a possible rewrite for this Python code function according to your explanation. Please just give me a pure code in reponse, not any explanation or text. Please do not add any clarifications after the rewritten code. JUST PURE CODE, DONT SAY ANY WORD, OTHER THAN CODE. ANSOLUTLY NOTHING.\n CODE: \n {code}"

    # Generating the rewrite codes
    rewrite_codes = apig.communication_regen(prompt=rewrite_prompt, n=3)
    # CleanUp the generated codes
    rewrite_codes = [apig.codeSnippetCleanup(item) for item in rewrite_codes]
    item["rewrite"] = rewrite_codes
    return item

In [None]:
rewrtied_data = []
with ThreadPoolExecutor(max_workers=8) as executer:
    
    futures_to_items = {executer.submit(process_item_openai, item, idx): item for idx, item in enumerate(splitted_data) if idx < 5000}
    
    for i, future in enumerate(as_completed(futures_to_items)):
        try:
            result = future.result()
            if result is not None: rewrtied_data.append(result)
            sys.stdout.write(f"\rProcessed {i + 1} of {len(splitted_data)} prompts...")
            sys.stdout.flush()
        except Exception as e:
            print(f"\nError processing prompt {i + 1}: {e}")
        # if i == 1 : break

In [None]:
rewrtied_data

In [None]:
with open(f"output/rewrite/{dataset}_rewrtie_{rewrite_model}_50.json", 'w') as file:
    json.dump(rewrtied_data, file, indent=4)