In [None]:
import os
import json
import pandas as pd
import numpy as np
import time
from tqdm import tqdm  # Import tqdm for the progress bar
from evaluator import evaluate
from eslint import EslintProcessor
from google_closure_compiler import GoogleClosureCompiler
from chatgpt_api import ChatGPT  # Import ChatGPT
from gemini_api import gemini_response,gemini_json_response # Import Gemini API
from collections import defaultdict
import enum
from typing_extensions import TypedDict

In [None]:
dataset_folder = './our_dataset'
# Define subfolder levels for processing
difficulty_levels = ['easy', 'medium', 'hard']

# Count total files to be processed for progress bar
total_files = sum([len(files) for level in difficulty_levels for _, _, files in os.walk(os.path.join(dataset_folder, level)) if files])

gt_code_lst = []
gt_dict = defaultdict(list)
#Load Dataset
# for i in range(total_files):
    # Loop through each difficulty level
for level in difficulty_levels:
    level_folder = os.path.join(dataset_folder, level)

    # Loop through each .js file in the current subfolder
    for file_name in os.listdir(level_folder):
        if file_name.endswith('.js'):
            full_file_path = os.path.join(level_folder, file_name)

            # Read the original code
            with open(full_file_path, 'r') as file:
                original_code = file.read()
                gt_code_lst.append(original_code)
                gt_dict[level].append(original_code)

In [None]:
PROMPT_CODEGEN="""
Task: Generate JavaScript code based on the following description.

Problem Description: {leetcode_desc}

Requirements:
1. Use modern JavaScript syntax (ES6+)
2. Implement error handling where appropriate
3. Follow best practices for code organization and readability
"""

PROMPT_REFACTOR="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

PROMPT_REFACTOR_V2="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Original Code: {gt_code}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

out_schema = {
    "code": "generated javascript code",
    "rationale": "explain your approach keep it under 200 words"
}


# Experiment 3: Refactor with metadata from LeetCode added
PROMPT_REFACTOR_V3="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Examples: {examples}

Constraint: {constraint}

Original Code: {gt_code}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

# Experiment 4: Refactor with linter output given in the prompt. (need linter output from Jacob)
PROMPT_REFACTOR_V4="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Linter output of the original code: {linter_output}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

# Experiment 5: Refactor with more specific requirements (e.g., make the solution 10 lines or less) (not trivial, the above cases first)
PROMPT_REFACTOR_V5="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

In [None]:
#Gemini General
model_name = 'gemini-1.5-flash'
MAX_TOKENS_ANSWER = 2000
ans = []
lvl_lst = []
rat = []
for level in difficulty_levels:
    for code in gt_dict[level]:
        code_prompt = PROMPT_REFACTOR_V2.format(gt_code = code)
        code_prompt = code_prompt + "\n" + "Provide output in valid JSON. The data schema should be like this: "+ json.dumps(out_schema)
        response_txt = gemini_response(code_prompt, model_name ,MAX_TOKENS_ANSWER)
        print(response_txt)
        response_json = json.loads(response_txt)
        ans.append(response_json['code'])
        rat.append(response_json['rationale'])
        lvl_lst.append(level)
        print("-------")
        time.sleep(15)

{"code": "/**\n * @param {string} word1\n * @param {string} word2\n * @return {string}\n */\nconst mergeAlternately = (word1, word2) => {\n  let merged = '';\n  let i = 0;\n  let j = 0;\n\n  while (i < word1.length || j < word2.length) {\n    if (i < word1.length) {\n      merged += word1[i];\n      i++;\n    }\n    if (j < word2.length) {\n      merged += word2[j];\n      j++;\n    }\n  }\n\n  return merged;\n};", "rationale": "The original code iterates up to the length of the longer string, checking each time if the index is within bounds for both strings. This leads to unnecessary checks. The improved code uses two pointers, `i` and `j`, to iterate through `word1` and `word2` respectively. The `while` loop continues as long as there are characters remaining in either string.  This approach avoids redundant checks and improves runtime efficiency by only accessing each character once. The code is also more readable with clearer variable names and a more structured loop."}

-------
{"

{"code": "const createCounter = (init) => {\n  let count = init;\n\n  return {\n    increment: () => ++count,\n    decrement: () => --count,\n    reset: () => count = init,\n  };\n};", "rationale": "The refactored code uses arrow functions for conciseness.  The variable name `presentCount` is changed to the shorter, more descriptive `count`.  There's no change to the runtime complexity; it remains O(1) for all operations. Readability is improved by using more concise syntax and clearer variable names.  The structure is simplified, making it easier to understand and maintain. The core functionality remains unchanged."}

-------
{"code": "const createHelloWorld = () => () => \"Hello World\";", "rationale": "The original code creates a closure to return the function that returns \"Hello World\". The refactored code uses arrow functions which are more concise and modern JavaScript syntax.  There's no change in runtime performance because the core functionality remains the same. Readability

{"code": "/**\n * Find Peak Element\n * @param {number[]} nums\n * @return {number}\n */\nconst findPeakElement = (nums) => {\n  let left = 0;\n  let right = nums.length - 1;\n\n  while (left < right) {\n    const mid = Math.floor((left + right) / 2);\n    if (nums[mid] < nums[mid + 1]) {\n      left = mid + 1;\n    } else {\n      right = mid;\n    }\n  }\n\n  return left;\n};", "rationale": "The original code uses a while loop and checks multiple conditions in each iteration. This approach can be improved by using binary search. The improved code uses binary search to find the peak element in O(log n) time. The binary search algorithm is more efficient than the original algorithm because it reduces the search space by half in each iteration. The improved code is also more readable and maintainable because it is shorter and easier to understand."}

-------
{"code": "function lowestCommonAncestor(root, p, q) {\n  if (!root || root === p || root === q) {\n    return root;\n  }\n\n  cons

{"code": "const reverseWords = (s) => s.split(' ').filter(word => word.length > 0).reverse().join(' ');", "rationale": "The improved code leverages built-in JavaScript array methods for conciseness and efficiency.  `split(' ')` separates the string into words. `filter(word => word.length > 0)` removes empty strings resulting from multiple spaces. `reverse()` reverses the array of words, and `join(' ')` joins them back into a string. This approach avoids manual loops, enhancing readability and performance by utilizing optimized native functions. The runtime complexity is improved to O(n), where n is the number of words, as each built-in method operates in linear time."}

-------
{"code": "const predictPartyVictory = (senate) => {\n  const senateArray = senate.split('');\n  let radiantCount = senateArray.filter(party => party === 'R').length;\n  let direCount = senateArray.filter(party => party === 'D').length;\n\n  return radiantCount > direCount ? 'Radiant' : 'Dire';\n};", "rationale":

In [None]:
output_df = pd.DataFrame()
output_df['gt_code'] = gt_code_lst
output_df['level'] = lvl_lst
output_df['refactor_code'] = ans
output_df['rationale'] = rat

output_df.head()

Unnamed: 0,gt_code,level,refactor_code,rationale
0,/**\n * @param {string} word1\n * @param {stri...,easy,/**\n * @param {string} word1\n * @param {stri...,The original code iterates up to the length of...
1,"var filter = function(arr, fn) {\n return a...",easy,"const filter = (arr, fn) => arr.filter(fn);",The original code uses flatMap to filter the a...
2,class Calculator {\n constructor(value) {\n...,easy,class Calculator {\n constructor(value) {\n ...,The original code is already quite efficient a...
3,/**\n * @param {number[]} nums\n * @param {Fun...,easy,/**\n * @param {number[]} nums\n * @param {Fun...,The original code iterated through the array u...
4,"var filter = function(arr, fn) {\n const re...",easy,"const filter = (arr, fn) => arr.filter(fn);",The original code iterated through the array u...


In [None]:
output_df.to_csv(f"output_files/CODE_REFACTOR_V2_{model_name}.csv")