In [None]:
"""
Shot and decomposed prompting: decompose into identify then remove

Identify: 4-shot
Remove: 4-shot
"""

In [None]:
my_key = "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"

In [None]:
# define variables
NUM_FILES = 20
SAMPLES_FOLDER_NAME = '../random_cells_unused_variables_vulture_seed42'
GPT_SAVED_FILE_NAME = 'unused_variables_gpt'

In [None]:
# read in files from folder random_cells
random_cells = []

for i in range(NUM_FILES):
    file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'
    with open(file_name, 'r') as f:
        random_cells.append(f.read())

In [None]:
# get unused data using vulture
import subprocess

def get_unused_data(SAMPLES_FOLDER_NAME):
    unused_before = [[] for _ in range(NUM_FILES)]

    for i in range(NUM_FILES):
        file_name = f'{SAMPLES_FOLDER_NAME}/{i}.py'

        # Run  on folder 'RANDOM_FILES_NAME'
        result = subprocess.run(['vulture', file_name], capture_output=True, text=True)

        # Get the output and return code
        outputs = result.stdout.strip().split('\n')
        outputs = [line for line in outputs if line != '']

        # keep only the strings that contain 
        outputs = [line for line in outputs if 'unused variable' in line]

        for output in outputs:
            # Get the name
            name = output.split("\'")[1]
            # Store
            unused_before[i].append(name)
    
    return unused_before

In [None]:
task1 = "A variable is unused if it is assigned using an equals operator but is not referenced after being defined. Identify unused variables in the code delimited by triple backticks. Ignore unused functions, imports, or classes. Output this as a list of variables names."

task1_ex1_input = """```python
time = 5
d = 5
print(d)

def calc():
    a = 5
    b = 4
    return a + b
```"""

task1_ex1_output = """["time"]"""

task1_ex2_input = """```python
val1 = 'hello'
val2 = 'world'
```"""

task1_ex2_output = """['val1', 'val2']"""

task1_ex3_input = """```python
z = [1, 2, 3]
a, b, c = z
print(a + b)
```"""

task1_ex3_output = """['c']"""

task1_ex4_input = """```python
def associate():
    return center.path()

path = associate()
```"""

task1_ex4_output = """['path']"""

In [None]:
# identify unused
import openai
openai.api_key = my_key

# GPT
def identify_unused(cell_src):
    while True:
        try:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                temperature=0,
                messages = [
                {"role": "user", "content": task1},
                {"role": "user", "content": task1_ex1_input},
                {"role": "assistant", "content": task1_ex1_output},
                {"role": "user", "content": task1_ex2_input},
                {"role": "assistant", "content": task1_ex2_output},
                {"role": "user", "content": task1_ex3_input},
                {"role": "assistant", "content": task1_ex3_output},
                {"role": "user", "content": task1_ex4_input},
                {"role": "assistant", "content": task1_ex4_output},
                {"role": "user", "content": f"```python\n{cell_src}\n```"}
            ]
            )
        except Exception as e:
            if 'maximum context length' in str(e):
                print('...Error.. too long...' + str(e))
                return 'length', ''
            else:
                print('...Error.. trying again...' + str(e))
        else:
            break
    return completion.choices[0].finish_reason, completion.choices[0].message["content"]

gpt_results = []
for i, cell_src in enumerate(random_cells):
    print(f'Processing file {i}')
    finish_reason, result = identify_unused(cell_src)
    print(f'File {i} - {finish_reason}')
    gpt_results.append({'reason': finish_reason, 'result': result})

# save the results to a file
with open(GPT_SAVED_FILE_NAME, 'w') as f:
    f.write(str(gpt_results))

In [None]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME, 'r') as f:
    gpt_results = eval(f.read())

In [None]:
# checking finish reason for identified
# check the 'reason' for each file in gpt_results and count them
finish_reasons = {}
for result in gpt_results:
    reason = result['reason']
    if reason in finish_reasons:
        finish_reasons[reason] += 1
    else:
        finish_reasons[reason] = 1

# print the counts
for reason, count in finish_reasons.items():
    print(f'{reason}: {count}')

# determine which numbers did not finish due to length
finish_reason_length = []
for i, result in enumerate(gpt_results):
    reason = result['reason']
    if reason == 'length':
        finish_reason_length.append(i)

# print the numbers
print(finish_reason_length)

In [None]:
# save the results to a variable
gpt_unused_names = []
for var in gpt_results:
    if var['reason'] == 'stop':
        try:
            gpt_unused_names.append(eval(var['result']))
        except:
            gpt_unused_names.append([])
    else:
        gpt_unused_names.append([])

In [None]:
task2 = """Remove the variables specified from the code snippet enclosed by triple backticks. Do not add, modify, or remove anything else. If removing variables that are in an unpacking statement, change it to an underscore. Output the updated code with the specified variables removed."""

task2_ex1_input = """Code:
```python
time = 5
d = 5
print(d)

def calc():
    a = 5
    b = 4
    return a + b
```

Variables to remove:
['time']"""

task2_ex1_output = """```python
d = 5
print(d)

def calc():
    a = 5
    b = 4
    return a + b
```"""

task2_ex2_input = """Code:
```python
val1 = 'hello'
val2 = 'world'
```

Variables to remove:
['val1', 'val2']"""

task2_ex2_output = """```python
```"""

task2_ex3_input = """Code:
```python
z = [1, 2, 3]
a, b, c = z
print(a + b)
```

Variables to remove:
['c']"""

task2_ex3_output = """```python
z = [1, 2, 3]
a, b, _ = z
print(a + b)
```"""

task2_ex4_input = """"Code:
```python
def associate():
    return center.path()

path = associate()
```

Variables to remove:
['path']"""

task2_ex4_output = """```python
def associate():
    return center.path()
```"""

In [None]:
# remove unused
import openai
openai.api_key = my_key

# GPT
def remove_unused(cell_src, function_names):
    while True:
        try:
            completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                temperature=0,
                messages = [
                {"role": "user", "content": task2},
                {"role": "user", "content": task2_ex1_input},
                {"role": "assistant", "content": task2_ex1_output},
                {"role": "user", "content": task2_ex2_input},
                {"role": "assistant", "content": task2_ex2_output},
                {"role": "user", "content": task2_ex3_input},
                {"role": "assistant", "content": task2_ex3_output},
                {"role": "user", "content": task2_ex4_input},
                {"role": "assistant", "content": task2_ex4_output},
                {"role": "user", "content": f"Code:\n```python\n{cell_src}\n```\n\nVariables to remove:\n{function_names}"}
            ]
            )
        except Exception as e:
            if 'maximum context length' in str(e):
                print('...Error.. too long...' + str(e))
                return 'length', ''
            else:
                print('...Error.. trying again...' + str(e))
        else:
            break
    return completion.choices[0].finish_reason, completion.choices[0].message["content"]

gpt_results_code = []
for i, cell_src in enumerate(random_cells):
    print(f'Processing file {i}')
    if gpt_unused_names[i] == []:
        finish_reason = 'skipped'
        result = random_cells[i]
        print('...skipping due to no identified...')
    else:
        finish_reason, result = remove_unused(cell_src, gpt_unused_names[i])
    print(f'File {i} - {finish_reason}')
    gpt_results_code.append({'reason': finish_reason, 'result': result})

# save the results to a file
with open(GPT_SAVED_FILE_NAME + "_code", 'w') as f:
    f.write(str(gpt_results_code))

In [None]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME + "_code", 'r') as f:
    gpt_results_code = eval(f.read())

In [None]:
# checking code finish reasons
# check the 'reason' for each file in gpt_results_code and count them
finish_reasons = {}
for result in gpt_results_code:
    reason = result['reason']
    if reason in finish_reasons:
        finish_reasons[reason] += 1
    else:
        finish_reasons[reason] = 1

# print the counts
for reason, count in finish_reasons.items():
    print(f'{reason}: {count}')

# determine which numbers did not finish due to length
finish_reason_length = []
for i, result in enumerate(gpt_results_code):
    reason = result['reason']
    if reason == 'length':
        finish_reason_length.append(i)

# print the numbers
print(finish_reason_length)

In [None]:
# save the updated code to files

gpt_code = []

# get all the code from the results
for i, result in enumerate(gpt_results_code):
    new = result['result'].split("```")
    if len(new) == 1:
        new = random_cells[i]
    else:
        new = new[1]
    if new.startswith('python'):
        new = new[6:].strip("\n")
    gpt_code.append(new)

# print all to new folder reformatted_gpt
import os
if not os.path.exists('gpt_code'):
    os.makedirs('gpt_code')
for i, code in enumerate(gpt_code):
    with open(f'gpt_code/{i}.py', 'w') as f:
        f.write(code)

In [None]:
# print random_cells to new folder
import os
if not os.path.exists('random_cells'):
    os.makedirs('random_cells')
for i, code in enumerate(random_cells):
    with open(f'random_cells/{i}.py', 'w') as f:
        f.write(code)

In [None]:
before = get_unused_data('random_cells')

total_before = sum(len(item) for item in before)
print(f'Total before: {total_before}')

In [None]:
after = get_unused_data('gpt_code')

total_after = sum(len(item) for item in after)
print(f'Total after: {total_after}')

In [None]:
# List percentage difference between before and after for total
print(f'Total percentage difference: {(total_after - total_before) / total_before * 100}%')

In [None]:
# Identification results of Vulture vs GPT
gpt_before_count = sum([len(lst) for lst in gpt_unused_names])
vulture_before_count = sum([len(lst) for lst in before])
print(f'GPT before count: {gpt_before_count}')
print(f'Vulture before count: {vulture_before_count}')

print("------------")

# determine number of false and true positive identifications using gpt_unused_function_names and before
true_positives = 0
false_positives = 0
false_negatives = 0
for i, gpt_names in enumerate(gpt_unused_names):
    before_names = before[i]
    for name in gpt_names:
        if name in before_names:
            true_positives += 1
        else:
            false_positives += 1

for i, before_names in enumerate(before):
    gpt_names = gpt_unused_names[i]
    for name in before_names:
        if name not in gpt_names:
            false_negatives += 1

# print the results
print(f'True positives: {true_positives}')
print(f'False positives: {false_positives}')
print(f'False negatives: {false_negatives}')