In [None]:
"""
Tree-of-Thought prompting
"""

In [None]:
import sys
sys.path.append('../../')
from run_process import get_unused_data
sys.path.append('../../../')
from utils import stats_results_unused

In [None]:
# define variables
NUM_FILES = 209
SAMPLES_FOLDER_NAME = '../samples_unused_functions'
GPT_SAVED_FILE_NAME = 'unused_functions_gpt'

In [None]:
# read in files from folder random_cells
random_cells = []

for i in range(NUM_FILES):
    file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'
    with open(file_name, 'r') as f:
        random_cells.append(f.read())

In [None]:
main_prompt = "A function/method is unused if it is defined using 'def' but is not referenced after being defined. Remove unused functions/method definitions in the code delimited by triple backticks. First, identify unused function definitions in the code. Then, remove the identified unused function definitions from the code. Do not add, remove, or change anything else. Output under the headings 'Unused functions' and 'Updated code' respectively."

ex1_input = """```python
c = 5

def calc():
    a = 5
    b = 4
    return a + b
```"""

ex1_output = """First, we identify unused functions. Analyzing the code, the only function definition is 'calc'. However, this function is not referenced after being defined. Therefore, 'calc' is an unused function. We don't consider 'c' as it is a variable, not a function.

Unused functions:
['calc']

Next, we remove the identified unused functions from the code. We remove the function definition 'calc' and the code inside the function definition. We output the full code below with 'calc' fully removed.

Updated code:
```python
c = 5
```"""

ex2_input = """```python
def multiply(a, b):
    carousel()
    return a * b
    
def mystery():
    a = 5
    b = 4
    return a - b
```"""

ex2_output = """First, we identify unused functions. Analyzing the code, there are two function definitions: 'multiply' and 'mystery'. However, both functions are not referenced after definition. Therefore, 'multiply' and 'mystery' are unused functions. Since the definition for 'carousel' is not present in this code snippet we do not consider 'carousel'.

Unused functions:
['multiply', 'mystery']

Next, we remove the identified unused functions from the code. We remove the function definitions 'multiply' and 'mystery' and the code inside these function definitions. We output the full code below with the function definitions removed. However, since the input code only contains these definitions, the output is an empty code snippet.

Updated code:
```python
```"""

identify_vote_prompt = """Given an original task and multiple choices, choose the best answer for the original task. Analyze each choice in detail, then conclude in the last line 'The best choice is {s}', where s is the integer id of the choice. If all choices are equally good, return the smallest id. If no choice is good, return 0.
Original task: A function/method is unused if it is defined using 'def' but is not referenced after being defined. Identify unused functions/methods in the code delimited by triple backticks. If there are no unused functions, return an empty list."""

identify_vote_ex1_input = """```python
c = 5

def calc():
    a = 5
    b = 4
    return a + b
```

Choice 1:
[]
Choice 2:
['calc']
Choice 3:
['c', 'calc']"""

identify_vote_ex1_output = """Choice 1 is incorrect. Looking at the code, we see that there is a function definition 'calc' that is not referenced after being defined, so the empty list is incorrect.
Choice 2 is correct. Looking at the code, we see that there is a function definition 'calc' that is not referenced after being defined, so the list containing 'calc' is correct.
Choice 3 is incorrect. While 'calc' is an unused function, 'c' is a variable, not a function. Therefore, 'c' is not an unused function.

The best choice is 2."""

code_vote_prompt = """Given an original task and multiple choices, choose the best answer for the original task. Analyze each choice in detail, then conclude in the last line 'The best choice is {s}', where s is the integer id of the choice. If all choices are equally good, return the smallest id. If no choice is good, return 0.
Original task: Remove the function definitions and the code inside the function definitions for the functions specified by the user in the code delimited by triple backticks. If there are no function definitions specified by the user, return the original code."""

code_vote_ex1_input = """Original code:
```python
c = 5

def calc():
    a = 5
    b = 4
    return a + b
```

Functions to remove:
['calc']

Choice 1:
```python
c = 5

a = 5
b = 4
```
Choice 2:
```python
```
Choice 3:
```python
c = 5
```"""

code_vote_ex1_output = """Choice 1 is incorrect. Comparing the original code and Choice 1, while we have removed the function's signature, we have not removed the code inside the function definition.
Choice 2 is incorrect. Comparing the original code and Choice 2, while we have removed the specified function definition we have also removed extra code not specified.
Choice 3 is correct. Comparing the original code and Choice 3, we have removed the function definition and the code inside the function definition. We have not altered the code outside the function definition.

The best choice is 3."""

def get_cot_prompt(input_code):
    return [
        {"role": "user", "content": main_prompt},
        {"role": "user", "content": ex1_input},
        {"role": "assistant", "content": ex1_output},
        {"role": "user", "content": ex2_input},
        {"role": "assistant", "content": ex2_output},
        {"role" : "user", "content" : f"```python\n{input_code}\n```"},
    ]

def get_identified_names(identify_trials, identify_completions):
    # Get identified items
    identified_names = []

    for i in range(identify_trials):
        if identify_completions.choices[i].finish_reason == 'stop':
            try:
                unused_names = identify_completions.choices[i]['message']['content'].split('Unused functions:')[1].strip("\n")
            except:
                print("unexpected format for unused functions", identify_completions.choices[i]['message']['content'])
                unused_names = None
            else:
                # if None
                if 'None' in unused_names:
                    unused_names = None
                # if we have a list of items
                elif "[" in unused_names:
                    unused_names = unused_names.split("[")[1].split("]")[0].split(",")
                    unused_names = [name.strip().strip("'`") for name in unused_names if name.strip() != ""]
                # we have a bullet point list
                elif "-" in unused_names:
                    unused_names = unused_names.strip("- ").split("\n- ")
                    new_unused_names = []
                    for name in unused_names:
                        split =  name.split("`")
                        if len(split) > 1:
                            new_unused_names.append(split[1])
                        else:
                            new_unused_names.append(split[0])
                    unused_names = new_unused_names
                else:
                    print("unexpected format for unused functions", unused_names)
                    unused_names = None
        else:
            unused_names = None
        
        if unused_names is not None:
            unused_names.sort()

        if unused_names == []:
            unused_names = None

        identified_names.append(unused_names)
    
    return identified_names

def get_identify_vote_msgs(cell_src):
    def func(choices):
        final_msg = f"```python\n{cell_src}\n```\n\n"

        for i, choice in enumerate(choices):
            final_msg += f"Choice {i + 1}:\n{choice}\n"
        
        return [
        {"role": "user", "content": identify_vote_prompt},
        {"role": "user", "content": identify_vote_ex1_input},
        {"role": "assistant", "content": identify_vote_ex1_output},
        {"role": "user", "content": final_msg}
        ]
    
    return func

def get_code_vote_msgs(original_code):
    def func(unused_functions, choices):
        final_msg = f"Original code:\n```python\n{original_code}\n```\n\nFunctions to remove:\n{unused_functions}\n\n"
        
        for i, choice in enumerate(choices):
            final_msg += f"Choice {i + 1}:\n{choice}\n"
        
        return [
        {"role": "user", "content": code_vote_prompt},
        {"role": "user", "content": code_vote_ex1_input},
        {"role": "assistant", "content": code_vote_ex1_output},
        {"role" : "user", "content" : final_msg}
        ]

    return func

In [None]:
# GPT Tree of Thought
import sys
sys.path.append('../../../')
from tree_of_thought import solve_toc

identify_trials = 5
code_trials = 3
identify_vote_trials = 6
code_vote_trials = 4
identify_stop = "Updated code"

# identify and remove unused using GPT
gpt_results = []
for i in range(NUM_FILES):
    input_msgs_cot = get_cot_prompt(random_cells[i])
    get_identified_names_func = get_identified_names
    get_identify_votes_msgs_func = get_identify_vote_msgs(random_cells[i])
    get_code_votes_msgs_func = get_code_vote_msgs(random_cells[i])

    print(f'Processing file {i}')
    identified, updated_code = solve_toc(input_msgs_cot, identify_trials, code_trials, identify_vote_trials, code_vote_trials, identify_stop, get_identified_names_func, get_identify_votes_msgs_func, get_code_votes_msgs_func)
    print(f'File {i} - {identified}')
    gpt_results.append({'identified': identified, 'updated_code': updated_code})

# save the results to a file
with open(GPT_SAVED_FILE_NAME, 'w') as f:
    f.write(str(gpt_results))

In [None]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME, 'r') as f:
    gpt_results = eval(f.read())

In [None]:
# save the results to a variable
gpt_identified = [var['identified'] for var in gpt_results]
gpt_code = [var['updated_code'] for var in gpt_results]

In [None]:
# save the updated code to files
# if the code is None we write the original code
import os

if not os.path.exists('gpt_code'):
    os.makedirs('gpt_code')

for i, code in enumerate(gpt_code):
    with open(f'gpt_code/{i}.py', 'w') as f:
        if gpt_identified[i] is None or gpt_code[i] is None:
            f.write(random_cells[i])
        else:
            f.write(code)

In [None]:
# print random_cells to new folder
# TODO I think is just temporary for now bc of 20 files, later we will use all the files

import os
if not os.path.exists('random_cells'):
    os.makedirs('random_cells')
for i, code in enumerate(random_cells):
    with open(f'random_cells/{i}.py', 'w') as f:
        f.write(code)

In [None]:
before = get_unused_data(NUM_FILES, 'random_cells', 'function')

total_before = sum(len(item) for item in before)
print(f'Total before: {total_before}')

In [None]:
after = get_unused_data(NUM_FILES, 'gpt_code', 'function')

total_after = sum(len(item) for item in after)
print(f'Total after: {total_after}')

In [None]:
# List percentage difference between before and after for total
print(f'Total percentage difference: {(total_after - total_before) / total_before * 100}%')

In [None]:
stats_results_unused(gpt_identified, before)