In [None]:
"""
Simple prompt
"""

In [1]:
import sys
sys.path.append('../../')
from run_process import get_unused_data
sys.path.append('../../../')
from utils import print_check_gpt_results

In [2]:
my_key = "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"

In [3]:
# define variables
NUM_FILES = 5
SAMPLES_FOLDER_NAME = '../allfiles_unused_functionmethod_vulture'
GPT_SAVED_FILE_NAME = 'unused_functions_gpt'

In [4]:
# read in files from folder random_cells
random_cells = []

for i in range(NUM_FILES):
    file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'
    with open(file_name, 'r') as f:
        random_cells.append(f.read())

In [6]:
# identify unused functions using vulture (GPT)
import openai
openai.api_key = my_key

# GPT
def identify_remove_unused(cell_src):
    while True:
        try:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                temperature=0,
                messages = [
                {"role": "user", "content": "A function/method is unused if it is defined using `def` but is not referenced after being defined. Identify and remove unused functions/methods in the code delimited by triple backticks. Output under the headings 'Unused functions' (e.g., ['function1', 'function2', ...] and 'Updated code' (e.g., ```python\n# updated code here\n```)"},
                {"role": "user", "content": f"```python\n{cell_src}```"}
            ]
            )
        except Exception as e:
            if 'maximum context length' in str(e):
                print('...Error.. too long...' + str(e))
                return 'length', ''
            else:
                print('...Error.. trying again...' + str(e))
        else:
            break
    return completion.choices[0].finish_reason, completion.choices[0].message["content"]

gpt_results = []
for i, cell_src in enumerate(random_cells):
    print(f'Processing file {i}')
    finish_reason, result = identify_remove_unused(cell_src)
    print(f'File {i} - {finish_reason}')
    gpt_results.append({'reason': finish_reason, 'result': result})

# save the results to a file
with open(GPT_SAVED_FILE_NAME, 'w') as f:
    f.write(str(gpt_results))

Processing file 0
File 0 - stop
Processing file 1
File 1 - stop
Processing file 2
File 2 - stop
Processing file 3
File 3 - stop
Processing file 4
File 4 - stop


In [4]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME, 'r') as f:
    gpt_results = eval(f.read())

In [5]:
# checking finish reason for identified functions
print_check_gpt_results(gpt_results)

stop: 5
[]


In [7]:
# now split the data into files
gpt_unused_names = []
gpt_updated_code = []

for i, result in enumerate(gpt_results):
    if result['reason'] == 'stop':
        # split the result into unused function names and updated code
        result_split = result['result'].split('Updated code:')
        # get the unused function names
        unused_names = result_split[0].split('Unused functions:')[1].strip("\n ")
        # if None
        if 'None' in unused_names:
            unused_names = []
        # if we have a list of items
        elif "[" in unused_names:
            unused_names = unused_names.strip("[]").split(",")
            unused_names = [name.strip().strip("'") for name in unused_names if name.strip() != ""]
        # we have a bullet point list
        elif "-" in unused_names:
            unused_names = unused_names.strip("- ").split("\n- ")
            new_unused_names = []
            for name in unused_names:
                split =  name.split("`")
                if len(split) > 1:
                    new_unused_names.append(split[1])
                else:
                    new_unused_names.append(split[0])
            unused_names = new_unused_names
        else:
            assert False
        # get the updated code
        updated_code = result_split[1].split('```')[1]
        if updated_code.startswith('python'):
            updated_code = updated_code[6:]
        updated_code = updated_code.strip('\n')
        # store
        gpt_unused_names.append(unused_names)
        gpt_updated_code.append(updated_code)
    else:
        # if we error we assume no unused functions
        gpt_unused_names.append([])
        gpt_updated_code.append(random_cells[i])

In [8]:
# save the updated code to files
import os
if not os.path.exists('gpt_code'):
    os.makedirs('gpt_code')
for i, code in enumerate(gpt_updated_code):
    with open(f'gpt_code/{i}.py', 'w') as f:
        f.write(code)

In [9]:
# print random_cells to new folder
# TODO can delete this later when doing full study

import os
if not os.path.exists('random_cells'):
    os.makedirs('random_cells')
for i, code in enumerate(random_cells):
    with open(f'random_cells/{i}.py', 'w') as f:
        f.write(code)

In [10]:
before = get_unused_data(NUM_FILES, 'random_cells', 'function')

total_before = sum(len(item) for item in before)
print(f'Total before: {total_before}')

Total before: 5


In [11]:
after = get_unused_data(NUM_FILES, 'gpt_code', 'function')

total_after = sum(len(item) for item in after)
print(f'Total after: {total_after}')

Total after: 5


In [12]:
# List percentage difference between before and after for total
print(f'Total percentage difference: {(total_after - total_before) / total_before * 100}%')

Total percentage difference: 0.0%


In [13]:
# Identification results of Vulture vs GPT
gpt_before_count = sum([len(lst) for lst in gpt_unused_names])
vulture_before_count = sum([len(lst) for lst in before])
print(f'GPT before count: {gpt_before_count}')
print(f'Vulture before count: {vulture_before_count}')

print("------------")

# determine number of false and true positive identifications using gpt_unused_function_names and before
true_positives = 0
false_positives = 0
false_negatives = 0
for i, gpt_names in enumerate(gpt_unused_names):
    before_names = before[i]
    for name in gpt_names:
        if name in before_names:
            true_positives += 1
        else:
            false_positives += 1

for i, before_names in enumerate(before):
    gpt_names = gpt_unused_names[i]
    for name in before_names:
        if name not in gpt_names:
            false_negatives += 1

# print the results
print(f'True positives: {true_positives}')
print(f'False positives: {false_positives}')
print(f'False negatives: {false_negatives}')

GPT before count: 0
Vulture before count: 5
------------
True positives: 0
False positives: 0
False negatives: 5
