In [1]:
"""
Shot and decomposed prompting: decompose into identify then remove

Identify: 3-shot
Remove: 4-shot
"""

'\nShot and decomposed prompting: decompose into identify then remove\n\nIdentify: 3-shot\nRemove: 4-shot\n'

In [2]:
my_key = "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"

In [5]:
# define variables
NUM_FILES = 20
SAMPLES_FOLDER_NAME = '../allfiles_unused_functionmethod_vulture'
GPT_SAVED_FILE_NAME = 'unused_functions_gpt'

In [6]:
# read in files from folder random_cells
random_cells = []

for i in range(NUM_FILES):
    file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'
    with open(file_name, 'r') as f:
        random_cells.append(f.read())

In [3]:
# get unused data using vulture
import subprocess

def get_unused_data(SAMPLES_FOLDER_NAME):
    unused_before = [[] for _ in range(NUM_FILES)]

    for i in range(NUM_FILES):
        file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'

        # Run  on folder 'RANDOM_FILES_NAME'
        result = subprocess.run(['vulture', file_name], capture_output=True, text=True)

        # Get the output and return code
        outputs = result.stdout.strip().split('\n')
        outputs = [line for line in outputs if line != '']

        # keep only the strings that contain 
        outputs = [line for line in outputs if 'unused function' in line or 'unused method' in line]

        for output in outputs:
            # Get the name
            name = output.split("\'")[1]
            # Store
            unused_before[i].append(name)
    
    return unused_before

In [61]:
# identify unused functions using vulture (GPT)
import openai
openai.api_key = my_key

# GPT
def identify_unused(cell_src):
    while True:
        try:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                temperature=0,
                messages = [
                {"role": "user", "content": "A function/method is unused if it is defined using `def` but is not referenced after being defined. Identify unused functions/methods in the code delimited by triple backticks. Output this as a list of function names."},
                {"role": "user", "content": "```python\nc = 5\n\ndef calc():\n    a = 5\n    b = 4\n    return a + b```"},
                {"role": "assistant", "content": "['calc']"},
                {"role": "user", "content": "```python\ndef multiply(a, b):\n    return a * b\n\nc = 5\n\ndef special():\n    a = 5\n    b = 4\n    return multiply(a, b)```"},
                {"role": "assistant", "content": "['special']"},
                {"role": "user", "content": "```python\nclass Person:\n    def __init__(self, name, age):\n        self.name = name\n        self.age = age\n\n    def update_name(self, name):\n        self.name = name```"},
                {"role": "assistant", "content": "['update_name']"},
                {"role": "user", "content": f"```python\n{cell_src}```"}
            ]
            )
        except Exception as e:
            if 'maximum context length' in str(e):
                print('...Error.. too long...' + str(e))
                return 'length', ''
            else:
                print('...Error.. trying again...' + str(e))
        else:
            break
    return completion.choices[0].finish_reason, completion.choices[0].message["content"]

gpt_results = []
for i, cell_src in enumerate(random_cells):
    print(f'Processing file {i}')
    finish_reason, result = identify_unused(cell_src)
    print(f'File {i} - {finish_reason}')
    gpt_results.append({'reason': finish_reason, 'result': result})

# save the results to a file
with open(GPT_SAVED_FILE_NAME, 'w') as f:
    f.write(str(gpt_results))

Processing file 0
File 0 - stop
Processing file 1
File 1 - stop
Processing file 2
File 2 - stop
Processing file 3
File 3 - stop
Processing file 4
File 4 - stop
Processing file 5
File 5 - stop
Processing file 6
File 6 - stop
Processing file 7
File 7 - stop
Processing file 8
File 8 - stop
Processing file 9
File 9 - stop
Processing file 10
File 10 - stop
Processing file 11
File 11 - stop
Processing file 12
File 12 - stop
Processing file 13
File 13 - stop
Processing file 14
File 14 - stop
Processing file 15
File 15 - stop
Processing file 16
File 16 - stop
Processing file 17
File 17 - stop
Processing file 18
File 18 - stop
Processing file 19
File 19 - stop


In [4]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME, 'r') as f:
    gpt_results = eval(f.read())

In [5]:
# checking finish reason for identified functions
# check the 'reason' for each file in gpt_results and count them
finish_reasons = {}
for result in gpt_results:
    reason = result['reason']
    if reason in finish_reasons:
        finish_reasons[reason] += 1
    else:
        finish_reasons[reason] = 1

# print the counts
for reason, count in finish_reasons.items():
    print(f'{reason}: {count}')

# determine which numbers did not finish due to length
finish_reason_length = []
for i, result in enumerate(gpt_results):
    reason = result['reason']
    if reason == 'length':
        finish_reason_length.append(i)

# print the numbers
print(finish_reason_length)

stop: 20
[]


In [6]:
# save the results to a variable
gpt_unused_names = []
for var in gpt_results:
    if var['reason'] == 'stop':
        try:
            gpt_unused_names.append(eval(var['result']))
        except:
            gpt_unused_names.append([])
    else:
        gpt_unused_names.append([])

In [88]:
# remove unused functions using vulture (GPT)
import openai
openai.api_key = my_key

# GPT
def remove_unused(cell_src, function_names):
    while True:
        try:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                temperature=0,
                messages = [
                {"role": "user", "content": "Remove the functions specified from the code snippet enclosed by triple backticks. Do not add, modify, or remove anything else. Output the updated code with the specified functions removed."},
                {"role": "user", "content": "Code:\n```python\nc = 5\n\ndef random_adder():\n    a = 2\n    b = 3\n    return a + b + c```\n\nFunctions to remove:\n['random_adder']"},
                {"role": "assistant", "content": "```python\nc = 5```"},
                {"role": "user", "content": "Code:\n```python\npar_val = 23\ndef multiply(a, b):\n    return a * b\nnew_val = 1```\n\nFunctions to remove:\n['multiply']"},
                {"role": "assistant", "content": "```python\npar_val = 23\nnew_val = 1```"},
                {"role": "user", "content": "Code:\n```python\ndef estimate(earnings):\n    return 0.1 * earnings```\n\nFunctions to remove:\n['earnings']"},
                {"role": "assistant", "content": "```python\n```"},
                {"role": "user", "content": "Code:\n```python\ndef seven():\n    return 7\n\ndef validate(val1, val2):\n    assert val1 and val2\n    val3 = seven()\n    return val1 * val2 - val3```\n\nFunctions to remove:\n['seven', 'validate']"},
                {"role": "assistant", "content": "```python\n```"},
                {"role": "user", "content": f"Code:\n```python\n{cell_src}```\n\nFunctions to remove:\n{function_names}"}
            ]
            )
        except Exception as e:
            if 'maximum context length' in str(e):
                print('...Error.. too long...' + str(e))
                return 'length', ''
            else:
                print('...Error.. trying again...' + str(e))
        else:
            break
    return completion.choices[0].finish_reason, completion.choices[0].message["content"]

gpt_results_code = []
for i, cell_src in enumerate(random_cells):
    print(f'Processing file {i}')
    if gpt_unused_names[i] == []:
        finish_reason = 'skipped'
        result = random_cells[i]
        print('...skipping due to no identified...')
    else:
        finish_reason, result = remove_unused(cell_src, gpt_unused_names[i])
    print(f'File {i} - {finish_reason}')
    gpt_results_code.append({'reason': finish_reason, 'result': result})

# save the results to a file
with open(GPT_SAVED_FILE_NAME + "_code", 'w') as f:
    f.write(str(gpt_results_code))

Processing file 0
File 0 - stop
Processing file 1
File 1 - stop
Processing file 2
File 2 - stop
Processing file 3
File 3 - stop
Processing file 4
File 4 - stop
Processing file 5
File 5 - stop
Processing file 6
File 6 - stop
Processing file 7
File 7 - stop
Processing file 8
File 8 - stop
Processing file 9
File 9 - stop
Processing file 10
File 10 - stop
Processing file 11
File 11 - stop
Processing file 12
File 12 - stop
Processing file 13
File 13 - stop
Processing file 14
File 14 - stop
Processing file 15
File 15 - stop
Processing file 16
File 16 - stop
Processing file 17
File 17 - stop
Processing file 18
File 18 - stop
Processing file 19
File 19 - stop


In [10]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME + "_code", 'r') as f:
    gpt_results_code = eval(f.read())

In [11]:
# checking code finish reasons
# check the 'reason' for each file in gpt_results_code and count them
finish_reasons = {}
for result in gpt_results_code:
    reason = result['reason']
    if reason in finish_reasons:
        finish_reasons[reason] += 1
    else:
        finish_reasons[reason] = 1

# print the counts
for reason, count in finish_reasons.items():
    print(f'{reason}: {count}')

# determine which numbers did not finish due to length
finish_reason_length = []
for i, result in enumerate(gpt_results_code):
    reason = result['reason']
    if reason == 'length':
        finish_reason_length.append(i)

# print the numbers
print(finish_reason_length)

stop: 20
[]


In [12]:
# save the updated code to files

gpt_code = []

# get all the code from the results
for i, result in enumerate(gpt_results_code):
    new = result['result'].split("```")
    if len(new) == 1:
        new = random_cells[i]
    else:
        new = new[1]
    if new.startswith('python'):
        new = new[6:].strip("\n")
    gpt_code.append(new)

# print all to new folder reformatted_gpt
import os
if not os.path.exists('gpt_code'):
    os.makedirs('gpt_code')
for i, code in enumerate(gpt_code):
    with open(f'gpt_code/{i}.py', 'w') as f:
        f.write(code)

In [13]:
# print random_cells to new folder
import os
if not os.path.exists('random_cells'):
    os.makedirs('random_cells')
for i, code in enumerate(random_cells):
    with open(f'random_cells/{i}.py', 'w') as f:
        f.write(code)

In [14]:
before = get_unused_data('random_cells')

total_before = sum(len(item) for item in before)
print(f'Total before: {total_before}')

Total before: 22


In [15]:
after = get_unused_data('gpt_code')

total_after = sum(len(item) for item in after)
print(f'Total after: {total_after}')

Total after: 0


In [58]:
# List percentage difference between before and after for total
print(f'Total percentage difference: {(total_after - total_before) / total_before * 100}%')

Total percentage difference: -100.0%


In [26]:
# Identification results of Vulture vs GPT
gpt_before_count = sum([len(lst) for lst in gpt_unused_names])
vulture_before_count = sum([len(lst) for lst in before])
print(f'GPT before count: {gpt_before_count}')
print(f'Vulture before count: {vulture_before_count}')

print("------------")

# determine number of false and true positive identifications using gpt_unused_function_names and before
true_positives = 0
false_positives = 0
false_negatives = 0
for i, gpt_names in enumerate(gpt_unused_names):
    before_names = before[i]
    for name in gpt_names:
        if name in before_names:
            true_positives += 1
        else:
            false_positives += 1

for i, before_names in enumerate(before):
    gpt_names = gpt_unused_names[i]
    for name in before_names:
        if name not in gpt_names:
            false_negatives += 1

# print the results
print(f'True positives: {true_positives}')
print(f'False positives: {false_positives}')
print(f'False negatives: {false_negatives}')

True positives: 21
False positives: 2
False negatives: 1
