In [23]:
"""
Tree-of-Thought prompting
"""

'\nTree-of-Thought prompting\n'

In [24]:
NUM_FILES = 350
FOLDER_NAME = '../random_samples_formatting'
GPT_SAVED_FILE_NAME = 'formatted_code_gpt'
GPT_SAVED_FOLDER_NAME = 'reformatted_gpt'

In [25]:
# read in files from folder random_cells
random_cells = []

for i in range(NUM_FILES):
    file_name = f'{FOLDER_NAME}/{i}.py'
    with open(file_name, 'r') as f:
        random_cells.append(f.read())

In [26]:
main_prompt = """Format the code delimited by triple backticks according to PEP 8 conventions. Do not add, remove, or change anything else. First, identify formatting issues in the code. Then, fix these issues in the code. Structure your response under the following headings: 'Identified formatting issues' (a short paragraph describing the formatting issues) and 'Formatted code' (the full code with all formatting issues fixed)."""

shots_input = []
shots_output = []
for i in range(1, 5):
    with open(f'../shots/shot{i}.py', 'r') as f:
        shots_input.append(f.read())
    with open(f'../shots/shot{i}_after.py', 'r') as f:
        shots_output.append(f.read())

ex1_input = f"""```python
{shots_input[0]}
```"""

ex1_output = f"""First, we identify formatting issues in the code that violate PEP 8 conventions.

Identified formatting issues:
- Imports 'utils' and 'rain' should not be on the same line
- The indentation in the function 'evaluate' contains tabs
- The function 'evaluate' should have two tabs after its definition, not one

Now, we fix these issues in the code. For the first issue, we separate the two import statements onto separate lines. For the second issue, we replace the tab with four spaces. For the third issue, we add two blank lines after the function 'evaluate'.

Formatted code:
```python
{shots_output[0]}
```"""

ex2_input = f"""```python
{shots_input[1]}
```"""

ex2_output = f"""First, we identify formatting issues in the code that violate PEP 8 conventions.

Identified formatting issues:
- Arithmetic operators should have whitespace around them
- There is a line break after the binary operator but the line break should be before the binary operator
- No statements should end with a semicolon

Now, we fix these issues in the code. For the first issue, we add whitespace around the '*' operator. For second issue, we line break before the '*' operator, bringing the operator down to the next line. For the third issue, we remove the semicolon at the end of the line.

Formatted code:
```python
{shots_output[1]}
```"""

ex3_input = f"""```python
{shots_input[2]}
```"""

ex3_output = f"""First, we identify formatting issues in the code that violate PEP 8 conventions.

Identified formatting issues:
- Blank line contains whitespace but it should not
- The if statement is too long and should be split into multiple lines
- The code block inside the if-statement is not indented with 4 spaces

Now, we fix these issues in the code. For the first issue, we remove the whitespace in the blank line. For the second issue, we split the if statement into two lines. For the third issue, we add an extra space to the indent of the variable assignment in the if statement so it is a multiple of 4.

Formatted code:
```python
{shots_output[2]}
```"""

ex4_input = f"""```python
{shots_input[3]}
```"""

ex4_output = f"""First, we identify formatting issues in the code that violate PEP 8 conventions.

Identified formatting issues:
- There should be two blank lines between the imports and the first function not one
- The return statement in the function 'find' is too long and should be split into multiple lines
- No blank lines should contain whitespace
- The continuation line for the print statement is under-indented
- The closing bracket of the variable assignment 'updated_offset' does not match the indentation of the opening bracket's line

Now, we fix these issues in the code. For the first issue, we add an extra blank line after the import statement. For second issue, we split the return statement into two lines. For the third issue, we remove the whitespace in the blank line. For the fourth issue, we indent the second line of the print statement so that 'count' is aligned with 'new_prompt' in the line above. For the fifth issue, we remove indentation from the closing bracket of the variable assignment for 'updated_offset' so it is indented at the same level as the first line.

Formatted code:
```python
{shots_output[3]}
```"""

identify_vote_prompt = """Given an original task and multiple choices, choose the best answer for the original task. Analyze each choice in detail, then conclude in the last line 'The best choice is {s}', where s is the integer id of the choice. If all choices are equally good, return the smallest id. If no choice is good, return 0.
Original task: Identify formatting issues in the code delimited by triple backticks according to PEP 8 conventions."""

identify_vote_ex1_input = f"""Code:
```python
{shots_input[0]}
```

Choice 1:
- Imports 'utils' and 'rain' should not be on the same line
- The indentation in the function 'evaluate' contains tabs
- The function 'evaluate' should have two tabs after its definition, not one
Choice 2:
- Arithmetic operators should have whitespace around them
- There is a line break after the binary operator but the line break should be before the binary operator
- No statements should end with a semicolon
Choice 3:
- Variables are not in camelCase"""

identify_vote_ex1_output = """Choice 1 is the best choice. It correctly identifies each formatting issue according PEP 8 conventions. Each issue identified is present in the code. No issues are missing and no extra issues are identified.
Choice 2 is not the best choice. None of the issues identified are actually present in the code and none of the issues present in the code are correctly identified.
Choice 3 is not the best choice. Camel case is not a PEP 8 convention and is not a formatting issue.

The best choice is 1."""

code_vote_prompt = """Given an original task and multiple choices, choose the best answer for the original task. Analyze each choice in detail, then conclude in the last line 'The best choice is {s}', where s is the integer id of the choice. If all choices are equally good, return the smallest id. If no choice is good, return 0.
Original task: Fix the following formatting issues in the code delimited by triple backticks. Do not add, remove, or change anything else. Output the formatted code with the identified issues rectified."""

code_vote_ex1_input = f"""Formatting issues to fix:
- Imports 'utils' and 'rain' should not be on the same line
- The indentation in the function 'evaluate' contains tabs
- The function 'evaluate' should have two tabs after its definition, not one

Code:
```python
{shots_input[0]}
```

Choice 1:
```python
{shots_input[0]}
```
Choice 2:
```python
{shots_output[0]}
```
Choice 3:
```python
import utils, rain


def evaluate(x, y):
    return rain.proccess(x, y)


utils.print(evaluate(3, 4))
```"""

code_vote_ex1_output = """Choice 1 is incorrect. It does not change any of the issues identified and simply outputs the original code.
Choice 2 is correct. It correctly fixes each and every formatting issue identified without changing anything else.
Choice 3 is incorrect. While it fixes the indentation issues and missing blank lines, it does not fix the multiple imports on one line.

The best choice is 2."""

def get_cot_prompt(cell_src):
    return [
        {"role": "user", "content": main_prompt},
        {"role": "user", "content": ex1_input},
        {"role": "assistant", "content": ex1_output},
        {"role": "user", "content": ex2_input},
        {"role": "assistant", "content": ex2_output},
        {"role": "user", "content": ex3_input},
        {"role": "assistant", "content": ex3_output},
        {"role": "user", "content": ex4_input},
        {"role": "assistant", "content": ex4_output},
        {"role" : "user", "content" : f"```python\n{cell_src}\n```"},
    ]

def get_identified_names(identify_trials, identify_completions):
    identified_names = []
    for i in range(identify_trials):
        if identify_completions.choices[i].finish_reason == 'stop':
            try:
                issues = identify_completions.choices[i]['message']['content'].split('Identified formatting issues:')[1].strip("\n").split("\n")[0]
            except:
                print("unexpected format for issue", identify_completions.choices[i]['message']['content'])
                issues = None
        else:
            issues = None
        identified_names.append(issues)
    return identified_names

def get_identify_vote_msgs(cell_src):
    def func(choices):
        final_msg = f"Code:\n```python\n{cell_src}\n```\n\n"

        for i, choice in enumerate(choices):
            final_msg += f"Choice {i + 1}:\n{choice}\n"
        
        return [
        {"role": "user", "content": identify_vote_prompt},
        {"role": "user", "content": identify_vote_ex1_input},
        {"role": "assistant", "content": identify_vote_ex1_output},
        {"role": "user", "content": final_msg}
        ]
    
    return func

def get_code_vote_msgs(cell_src):
    def func(new_issues, choices):
        final_msg = f"Formatting issues to fix:\n{new_issues}\n\nCode:\n```python\n{cell_src}\n```\n\n"
        
        for i, choice in enumerate(choices):
            final_msg += f"Choice {i + 1}:\n{choice}\n"
        
        return [
        {"role": "user", "content": code_vote_prompt},
        {"role": "user", "content": code_vote_ex1_input},
        {"role": "assistant", "content": code_vote_ex1_output},
        {"role" : "user", "content" : final_msg}
        ]

    return func

In [27]:
identify_trials = 5
code_trials = 3
identify_vote_trials = 6
code_vote_trials = 4
identify_stop = "Formatted code"

In [28]:
# Estimate cost
import sys
sys.path.append("../..")
import utils

def estimate_tokens():
    in_tok = ''
    out_tok = ''
    for i, cell_src in enumerate(random_cells):
        # trial
        in_tok += main_prompt + ex1_input + ex1_output + ex2_input + ex2_output + ex3_input + ex3_output + ex4_input + ex4_output
        in_tok += f"```python\n{cell_src}\n```"
        out_tok += (ex1_output[:int(len(ex1_input)/2)] * identify_trials)
        # vote trial
        in_tok += identify_vote_prompt + identify_vote_ex1_input + identify_vote_ex1_output
        in_tok += f"```python\n{cell_src}\n```\n\n"
        out_tok += identify_vote_ex1_output * identify_vote_trials
        # code
        in_tok += main_prompt + ex1_input + ex1_output + ex2_input + ex2_output + ex3_input + ex3_output + ex4_input + ex4_output
        in_tok += f"```python\n{cell_src}\n```"
        in_tok += ex1_output[:int(len(ex1_input)/2)]
        out_tok += (ex1_output[int(len(ex1_input)/2):] * code_trials)
        # vote code
        in_tok += code_vote_prompt + code_vote_ex1_input + code_vote_ex1_output
        in_tok += f"Original code:\n```python\n{cell_src}\n```\n\nFormatting issues to fix: lalalalallaal\n\n"
        out_tok += code_vote_ex1_output * code_vote_trials
    return in_tok, out_tok

in_tok, out_tok = estimate_tokens()

utils.gpt_35_turbo_token_dollar_cost(in_tok, out_tok)

2.855013

In [29]:
# # GPT Tree of Thought
# import sys
# sys.path.append('../../')
# from tree_of_thought import solve_toc

# # identify and remove unused using GPT
# gpt_results = []
# for i in range(NUM_FILES):
#     input_msgs_cot = get_cot_prompt(random_cells[i])
#     get_identified_names_func = get_identified_names
#     get_identify_votes_msgs_func = get_identify_vote_msgs(random_cells[i])
#     get_code_votes_msgs_func = get_code_vote_msgs(random_cells[i])

#     print(f'Processing file {i}')
#     identified, updated_code = solve_toc(input_msgs_cot, identify_trials, code_trials, identify_vote_trials, code_vote_trials, identify_stop, get_identified_names_func, get_identify_votes_msgs_func, get_code_votes_msgs_func)
#     print(f'File {i} - {identified}')
#     gpt_results.append({'identified': identified, 'updated_code': updated_code})

# # save the results to a file
# with open(GPT_SAVED_FILE_NAME, 'w') as f:
#     f.write(str(gpt_results))

In [30]:
# read in gpt result from file
with open(GPT_SAVED_FILE_NAME, 'r') as f:
    gpt_results = eval(f.read())

In [31]:
# save the results to a variable
gpt_issues = [var['identified'] for var in gpt_results]
gpt_new_code = [var['updated_code'] for var in gpt_results]

In [32]:
# print all to new folder
import os
if not os.path.exists(GPT_SAVED_FOLDER_NAME):
    os.makedirs(GPT_SAVED_FOLDER_NAME)
for i, code in enumerate(gpt_new_code):
    with open(f'{GPT_SAVED_FOLDER_NAME}/{i}.py', 'w') as f:
        if gpt_new_code[i] is None or gpt_issues[i] is None:
            f.write(random_cells[i])
        else:
            f.write(code)

In [33]:
sys.path.append('../')
from common import pycodestyle, group_by_error, print_num_reductions, print_percentage_difference, IGNORE_TYPES

In [34]:
# store error counts in a hash
error_counts_before = pycodestyle(FOLDER_NAME, NUM_FILES, IGNORE_TYPES)

# print the error counts
total_errors_before = sum(error_counts_before.values())
print(f'Total before: {total_errors_before}')

Total before: 3062


In [35]:
# store error counts in a hash
error_counts_after = pycodestyle(GPT_SAVED_FOLDER_NAME, NUM_FILES, IGNORE_TYPES)

# print the error counts
total_errors_after = sum(error_counts_after.values())
print(f'Total after: {total_errors_after}')

Total after: 1437


In [36]:
error_counts_before

{'E501': 447,
 'E226': 181,
 'E231': 592,
 'E266': 80,
 'E303': 8,
 'W293': 213,
 'E251': 359,
 'E265': 130,
 'W291': 142,
 'E202': 29,
 'E203': 47,
 'E241': 16,
 'E225': 154,
 'E117': 2,
 'E128': 33,
 'E302': 48,
 'E401': 2,
 'E703': 52,
 'E221': 76,
 'E305': 27,
 'E228': 5,
 'E402': 32,
 'E111': 141,
 'E114': 22,
 'E261': 46,
 'E722': 2,
 'E201': 29,
 'E275': 9,
 'E126': 4,
 'E127': 3,
 'E211': 13,
 'E271': 2,
 'E262': 27,
 'E113': 2,
 'E116': 8,
 'W504': 6,
 'E123': 2,
 'E101': 1,
 'W191': 2,
 'E701': 3,
 'E121': 33,
 'E131': 3,
 'E702': 9,
 'E741': 2,
 'E115': 3,
 'E124': 1,
 'E222': 8,
 'E301': 2,
 'E712': 1,
 'E122': 3}

In [37]:
error_counts_after

{'E501': 437,
 'W293': 129,
 'E251': 43,
 'E226': 74,
 'E231': 162,
 'E265': 50,
 'W291': 67,
 'E225': 82,
 'E266': 30,
 'E302': 33,
 'E128': 8,
 'E305': 18,
 'E228': 1,
 'E722': 1,
 'E202': 6,
 'E201': 9,
 'E275': 6,
 'E401': 1,
 'E203': 6,
 'E113': 1,
 'E271': 1,
 'E261': 19,
 'E402': 11,
 'W504': 9,
 'E303': 5,
 'E111': 106,
 'E123': 1,
 'E126': 2,
 'E124': 3,
 'E703': 20,
 'E101': 1,
 'W191': 2,
 'E701': 3,
 'E221': 16,
 'E741': 1,
 'E121': 31,
 'E241': 3,
 'E262': 3,
 'E114': 20,
 'E116': 4,
 'E222': 3,
 'E301': 2,
 'E712': 1,
 'E127': 6}

In [38]:
# List percentage difference between before and after for total
print(f'Total percentage difference: {(total_errors_after - total_errors_before) / total_errors_before * 100}%')

Total percentage difference: -53.06988896146309%


In [39]:
error_counts_grouped_before = group_by_error(error_counts_before)
error_counts_grouped_before

{'E1': 261,
 'E2': 1803,
 'E3': 85,
 'E4': 34,
 'E5': 447,
 'E7': 69,
 'E9': 0,
 'W1': 2,
 'W2': 355,
 'W3': 0,
 'W5': 6,
 'W6': 0}

In [40]:
error_counts_grouped_after = group_by_error(error_counts_after)
error_counts_grouped_after

{'E1': 183,
 'E2': 514,
 'E3': 58,
 'E4': 12,
 'E5': 437,
 'E7': 26,
 'E9': 0,
 'W1': 2,
 'W2': 196,
 'W3': 0,
 'W5': 9,
 'W6': 0}

In [41]:
print_num_reductions(error_counts_grouped_before, error_counts_grouped_after)

('E1', 'Indentation'): 261 -> 183
('E2', 'Whitespace'): 1803 -> 514
('E3', 'Blank line'): 85 -> 58
('E4', 'Import'): 34 -> 12
('E5', 'Line length'): 447 -> 437
('E7', 'Statement'): 69 -> 26
('E9', 'Runtime'): 0 -> 0


In [42]:
print_percentage_difference(error_counts_grouped_before, error_counts_grouped_after)

('E1', 'Indentation'): -29.89%
('E2', 'Whitespace'): -71.49%
('E3', 'Blank line'): -31.76%
('E4', 'Import'): -64.71%
('E5', 'Line length'): -2.24%
('E7', 'Statement'): -62.32%
('E9', 'Runtime'): Undefined
