In [None]:
"""
Tree-of-Thought prompting

identify_trials = 5
remove_trials = 2
remove_vote_trials = 5
"""

In [1]:
import sys
sys.path.append('../../')
from run_process import get_unused_data
sys.path.append('../../../')
from utils import stats_results_unused

In [2]:
my_key = "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"

In [3]:
# define variables
NUM_FILES = 20
SAMPLES_FOLDER_NAME = '../allfiles_unused_functionmethod_vulture'
GPT_SAVED_FILE_NAME = 'unused_functions_gpt'

In [4]:
# read in files from folder random_cells
random_cells = []

for i in range(NUM_FILES):
    file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'
    with open(file_name, 'r') as f:
        random_cells.append(f.read())

In [5]:
import openai
openai.api_key = my_key

# GPT wrapper -- sometimes it fails and we should retry
def gpt_wrapper(msgs, n, stop):
    while True:
        try:
            completions = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                temperature=1,
                messages= msgs,
                n = n,
                stop = stop
            )
        except Exception as e:
            if 'maximum context length' in str(e):
                print('...Error.. too long...' + str(e))
                return None
            else:
                print('...Error.. trying again...' + str(e))
        else:
            break
    return completions

In [11]:
# Function definitions for Tree of Thought prompting

import re

def prompt_cot(n, stop, input_code, input_msg):
    msgs = [
            {"role": "user", "content": "A function/method is unused if it is defined using 'def' but is not referenced after being defined. Remove unused functions/methods in the code delimited by triple backticks. First, identify unused functions in the code. Then, remove the identified unused functions from the code. Output under the headings 'Unused functions' and 'Updated code' respectively. If there are no unused functions, return an empty list."},
            {"role": "user", "content": "```python\nc = 5\n\ndef calc():\n    a = 5\n    b = 4\n    return a + b\n```"},
            {"role": "assistant", "content": "Unused functions:\n['calc']\n\nUpdated code:\n```python\nc = 5\n```\n\nExplanation:\nFully removed the functions ['calc'] and the code inside these functions."},
            {"role": "user", "content": "```python\ndef multiply(a, b):\n    return a * b\n\ndef mystery():\n    a = 5\n    b = 4\n    return a - b\n```"},
            {"role": "assistant", "content": "Unused functions:\n['multiply', 'mystery']\n\nUpdated code:\n```python\n```\n\nExplanation:\nFully removed the functions ['mystery', 'multiply'] and the code inside these functions"},
            {"role" : "user", "content" : f"```python\n{input_code}\n```"},   
        ]
    if input_msg:
        msgs.append({"role" : "assistant", "content" : input_msg})

    completions = gpt_wrapper(msgs, n, stop)
    return completions

def prompt_vote(n, stop, original_code, unused_functions, choice1, choice2):
    msgs = [
            {"role": "user", "content": "Given original code, a list of unused function names, and two updated code choices, decide which choice best removes the identified unused functions from the original code. Analyze each choice in detail, then conclude in the last line 'The best choice is {s}', where s is the integer id of the choice."},
            {"role": "user", "content": f"Original code:\n```python\nc = 5\n\ndef calc():\n    a = 5\n    b = 4\n    return a + b\n```\nUnused functions:\n['calc']\nChoice 1:\n```python\nc = 5\n\na = 5\n    b = 4\n```\nChoice 2:\n```python\nc = 5\n```"},
            {"role": "assistant", "content": "Choice 1 does not properly remove the unused functions ['calc']. While it does remove the function signature and return statement, it does not fully remove the contents of the function.\n\nChoice 2 properly removes the unused functions ['calc']. The functions identified are indeed unused and have been fully removed from the code.\n\nThe best choice is 2."},
            {"role": "user", "content": f"Original code:\n```python\n{original_code}\n```\nUnused functions:\n{unused_functions}\nChoice 1:\n```python\n{choice1}\n```\nChoice 2:\n```python\n{choice2}\n```"}
            ]
    completions = gpt_wrapper(msgs, n, stop)
    return completions

def solve(input_code):
    identify_trials = 5
    remove_trials = 2
    remove_vote_trials = 5

    # Identify unused items
    identify_stop = "Updated code:"
    identify_completions = prompt_cot(n = identify_trials, stop = identify_stop, input_code = input_code, input_msg = None)

    # If the entire thing fails we have to return
    if identify_completions is None:
        print("Identifying code failed")
        return None, None

    # Get identified items
    identified_names = []
    failed_identified_count = 0

    for i in range(identify_trials):
        if identify_completions.choices[i].finish_reason == 'stop':
            lst = eval(identify_completions.choices[i]['message']['content'].split('Unused functions:')[1].strip("\n"))
            if type(lst) == list:
                lst.sort()
                identified_names.append(lst)
            else:
                identified_names.append([])
        else:
            identified_names.append([])
            failed_identified_count += 1
    
    # Convert to a list of strings so it is hashable
    identified_names = [str(item) for item in identified_names]
    
    # Vote on the most popular choice by counting the number of times each item appears
    # in the list of identified items (don't need ChatGPT for this)
    vote = {}
    for item in identified_names:
        if item in vote:
            vote[item] += 1
        else:
            vote[item] = 1
    
    # Get the most popular choice
    most_popular_identify = eval(max(vote, key=vote.get))

    # If the most popular choice is empty, we should return here as there is nothing to remove
    if most_popular_identify == []:
        return most_popular_identify, input_code

    # Remove the most popular choice from the code
    input_msg = f"Unused functions:\n{most_popular_identify}\n\n"
    remove_completions = prompt_cot(n = remove_trials, stop = None, input_code = input_code, input_msg = input_msg)

    # If removing code fails we return here
    if remove_completions is None:
        print("Removing code failed")
        return most_popular_identify, None

    # Get the updated code
    updated_code = []
    for i in range(remove_trials):
        if identify_completions.choices[i].finish_reason == 'stop':
            code = remove_completions.choices[i]['message']['content'].split('```')[1].split('```')[0]
            if code.startswith('python'):
                code = code[6:]
            code = code.strip("\n")
            updated_code.append(code)
        else:
            updated_code.append(None)
    
    # RELYING ON THE FACT REMOVE TRIALS IS 2
    # if the code is the same, we return either trial
    if updated_code[0] == updated_code[1]:
        print("Updated code is the same")
        return most_popular_identify, updated_code[0]

    # Vote on the best choice using GPT
    gpt_votes = prompt_vote(n = remove_vote_trials, stop = None, original_code = input_code, unused_functions = most_popular_identify, choice1 = updated_code[0], choice2 = updated_code[1])
    
    # If voting fails we return here
    if gpt_votes is None:
        print("Voting failed")
        return most_popular_identify, None

    vote_results = [0] * remove_trials
    for i in range(remove_vote_trials):
        if identify_completions.choices[i].finish_reason == 'stop':
            vote_output = gpt_votes.choices[i]['message']['content']
            pattern = r".*best choice is .*(\d+).*"
            match = re.match(pattern, vote_output, re.DOTALL)
            if match:
                vote = int(match.groups()[0]) - 1
                if vote in range(remove_trials):
                    vote_results[vote] += 1
            else:
                print(f'vote no match: {[vote_output]}')
        else:
            print(f"Voting failed for trial {i}")
    
    # Get the most popular choice
    most_popular_remove = vote_results.index(max(vote_results))
    
    return most_popular_identify, updated_code[most_popular_remove]

In [12]:
# identify and remove unused functions using GPT
gpt_results = []
for i, cell_src in enumerate(random_cells):
    print(f'Processing file {i}')
    identified, updated_code = solve(cell_src)
    print(f'File {i} - {identified}')
    gpt_results.append({'identified': identified, 'updated_code': updated_code})

# save the results to a file
with open(GPT_SAVED_FILE_NAME, 'w') as f:
    f.write(str(gpt_results))

Processing file 0
File 0 - []
Processing file 1
File 1 - []
Processing file 2
File 2 - []
Processing file 3
File 3 - []
Processing file 4
File 4 - []
Processing file 5
File 5 - []
Processing file 6
Updated code is the same
File 6 - ['basic_stock_features']
Processing file 7
Updated code is the same
File 7 - ['clean_data']
Processing file 8
Updated code is the same
File 8 - ['create_xgb_target']
Processing file 9
Updated code is the same
File 9 - ['create_xgb_features']
Processing file 10
File 10 - []
Processing file 11
File 11 - []
Processing file 12
File 12 - []
Processing file 13
File 13 - ['fetch_pretrained_inception_v3']
Processing file 14
File 14 - []
Processing file 15
Updated code is the same
File 15 - ['fetch_flowers']
Processing file 16
File 16 - []
Processing file 17
File 17 - []
Processing file 18
File 18 - []
Processing file 19
File 19 - []


In [5]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME, 'r') as f:
    gpt_results = eval(f.read())

In [6]:
# save the results to a variable
gpt_identified = [var['identified'] for var in gpt_results]
gpt_code = [var['updated_code'] for var in gpt_results]

In [7]:
# save the updated code to files
# if the code is None we write the original code
import os

if not os.path.exists('gpt_code'):
    os.makedirs('gpt_code')

for i, code in enumerate(gpt_code):
    with open(f'gpt_code/{i}.py', 'w') as f:
        if code is None:
            f.write(random_cells[i])
        else:
            f.write(code)

In [8]:
# print random_cells to new folder
# TODO I think is just temporary for now bc of 20 files, later we will use all the files

import os
if not os.path.exists('random_cells'):
    os.makedirs('random_cells')
for i, code in enumerate(random_cells):
    with open(f'random_cells/{i}.py', 'w') as f:
        f.write(code)

In [9]:
before = get_unused_data(NUM_FILES, 'random_cells', 'function')

total_before = sum(len(item) for item in before)
print(f'Total before: {total_before}')

Total before: 22


In [10]:
after = get_unused_data(NUM_FILES, 'gpt_code', 'function')

total_after = sum(len(item) for item in after)
print(f'Total after: {total_after}')

Total after: 16


In [11]:
# List percentage difference between before and after for total
print(f'Total percentage difference: {(total_after - total_before) / total_before * 100}%')

Total percentage difference: -27.27272727272727%


In [12]:
stats_results_unused(gpt_identified, before)

GPT before count: 6
Vulture before count: 22
------------
True positives: 6
False positives: 0
False negatives: 16
