In [11]:
import os
import json
import pandas as pd
import numpy as np
import time
from tqdm import tqdm  # Import tqdm for the progress bar
from evaluator import evaluate
from eslint import EslintProcessor
from google_closure_compiler import GoogleClosureCompiler
from chatgpt_api import ChatGPT  # Import ChatGPT
from gemini_api import gemini_response  # Import Gemini API
from collections import defaultdict

In [2]:
dataset_folder = './our_dataset'
# Define subfolder levels for processing
difficulty_levels = ['easy', 'medium', 'hard']

# Count total files to be processed for progress bar
total_files = sum([len(files) for level in difficulty_levels for _, _, files in os.walk(os.path.join(dataset_folder, level)) if files])

gt_code_lst = []
gt_dict = defaultdict(list)
#Load Dataset
# for i in range(total_files):
    # Loop through each difficulty level
for level in difficulty_levels:
    level_folder = os.path.join(dataset_folder, level)

    # Loop through each .js file in the current subfolder
    for file_name in os.listdir(level_folder):
        if file_name.endswith('.js'):
            full_file_path = os.path.join(level_folder, file_name)

            # Read the original code
            with open(full_file_path, 'r') as file:
                original_code = file.read()
                gt_code_lst.append(original_code)
                gt_dict[level].append(original_code)

In [3]:
PROMPT_CODEGEN="""
Task: Generate JavaScript code based on the following description.

Problem Description: {leetcode_desc}

Requirements:
1. Use modern JavaScript syntax (ES6+)
2. Implement error handling where appropriate
3. Follow best practices for code organization and readability
"""

PROMPT_REFACTOR="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Requirments:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

PROMPT_REFACTOR_V2="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Original Code: {gt_code}

Requirments:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

out_schema = {
    "code": "generated javascript code",
    "rationale": "explain your approach"
}

In [9]:
#GPT Refactoring
model_name = "gpt-4-turbo"
MAX_TOKENS_ANSWER = 1000
CG = ChatGPT(MODEL = model_name)
ans = []
lvl_lst = []
rat = []
for level in difficulty_levels:
    for code in gt_dict[level]:
        code_prompt = PROMPT_REFACTOR_V2.format(gt_code = code)
        response_txt = CG.get_gpt_json_response(code_prompt, out_schema ,MAX_TOKENS_ANSWER)
        print(response_txt)
        response_json = json.loads(response_txt)
        ans.append(response_json['code'])
        rat.append(response_json['rationale'])
        lvl_lst.append(level)
        print("-------")

{
  "code": "/**\n * Merges two strings alternately.\n * @param {string} word1 The first word.\n * @param {string} word2 The second word.\n * @return {string} The merged string.\n */\nconst mergeAlternately = (word1, word2) => {\n  const maxLength = Math.max(word1.length, word2.length);\n  let mergedString = [];\n\n  for (let i = 0; i < maxLength; i++) {\n    if (i < word1.length) mergedString.push(word1[i]);\n    if (i < word2.length) mergedString.push(word2[i]);\n  }\n\n  return mergedString.join('');\n};",
  "rationale": "The refactored code improves readability by using modern JavaScript syntax (ES6+), such as const for immutable variables and arrow functions for concise function declaration. The use of an array to collect characters and then joining them at the end (mergedString.join('')) is generally more efficient than concatenating strings in a loop, especially for larger strings, as it avoids creating multiple intermediate string objects. The variable names are clear and descr

{
  "code": "/**\n * Finds unique elements in each of two arrays.\n * @param {number[]} nums1 - First array of numbers.\n * @param {number[]} nums2 - Second array of numbers.\n * @return {number[][]} - An array containing two arrays: unique elements to nums1 and nums2 respectively.\n */\nconst findDifference = (nums1, nums2) => {\n    const set1 = new Set(nums1);\n    const set2 = new Set(nums2);\n    const uniqueToNums1 = nums1.filter(x => !set2.has(x));\n    const uniqueToNums2 = nums2.filter(x => !set1.has(x));\n\n    return [uniqueToNums1, uniqueToNums2];\n};",
  "rationale": "The refactored code improves readability by using concise ES6 arrow functions and better documentation. It also enhances performance by eliminating the need to convert sets back to arrays before filtering, which reduces unnecessary array creation and processing. The use of direct filtering on the original arrays with checks against the sets ensures that each element is processed only once, optimizing the runt

{
  "code": "function reduceArray(elements, reducer, initialValue) {\n    return elements.reduce(reducer, initialValue);\n}",
  "rationale": "The refactored code utilizes JavaScript's built-in Array.prototype.reduce method, which simplifies the function and improves readability by removing the explicit loop and manual accumulation. This approach leverages the native implementation of the reduce method, which is typically optimized for performance in JavaScript engines. The parameter names 'elements', 'reducer', and 'initialValue' are more descriptive, enhancing the understandability of the function's purpose and usage."
}
-------
{
  "code": "function filter(arr, fn) {\n  const result = [];\n  for (let i = 0; i < arr.length; i++) {\n    if (fn(arr[i], i)) {\n      result.push(arr[i]);\n    }\n  }\n  return result;\n}",
  "rationale": "The refactored code replaces the 'reduce' method with a simple 'for' loop, which improves both readability and performance. The 'for' loop is generally f

{
  "code": "const debounce = (func, delay) => {\n  let timeoutId = null;\n  return (...args) => {\n    clearTimeout(timeoutId);\n    timeoutId = setTimeout(() => func(...args), delay);\n  };\n};",
  "rationale": "The refactored code improves readability and maintainability by using more descriptive variable names and ES6 syntax. The variable 'func' clearly indicates that it is a function, and 'delay' specifies the delay time, making the code more intuitive. The use of 'const' for the debounce function and 'let' for the timeoutId ensures that these variables are not re-assigned unexpectedly. The arrow function syntax is used for conciseness and clarity. The runtime efficiency remains the same as the original, as the core logic of debouncing is unchanged."
}
-------
{
  "code": "/**\n * Finds the maximum length of a subarray with at most k 0's.\n * @param {number[]} nums - The input array of 0s and 1s.\n * @param {number} k - The maximum number of 0's allowed in the subarray.\n * @retur

{
  "code": "const promiseAll = async (functions) => {\n    return new Promise((resolve, reject) => {\n        if (functions.length === 0) {\n            resolve([]);\n            return;\n        }\n\n        const results = new Array(functions.length);\n        let resolvedCount = 0;\n\n        functions.forEach((fn, index) => {\n            fn().then(value => {\n                results[index] = value;\n                resolvedCount++;\n                if (resolvedCount === functions.length) {\n                    resolve(results);\n                }\n            }).catch(error => {\n                reject(error);\n            });\n        });\n    });\n};",
  "rationale": "The refactored code improves readability by using more descriptive variable names such as 'results' and 'resolvedCount' instead of 'arr' and 'count'. It also maintains the use of 'forEach' to iterate over the functions, but instead of using 'async/await' inside 'forEach', it uses the '.then' and '.catch' methods t

In [12]:
output_df = pd.DataFrame()
output_df['gt_code'] = gt_code_lst
output_df['level'] = lvl_lst
output_df['refactor_code'] = ans
output_df['rationale'] = rat

output_df.head()

Unnamed: 0,gt_code,level,refactor_code,rationale
0,/**\n * @param {string} word1\n * @param {stri...,easy,/**\n * Merges two strings alternately.\n * @p...,The refactored code improves readability by us...
1,"var filter = function(arr, fn) {\n return a...",easy,"function filterArray(array, predicate) {\n ...",The refactored code replaces the use of 'flatM...
2,class Calculator {\n constructor(value) {\n...,easy,class Calculator {\n constructor(initialVal...,1. **Initialization**: The constructor now def...
3,/**\n * @param {number[]} nums\n * @param {Fun...,easy,"const reduceArray = (nums, reducer, initialVal...",The refactored code uses the built-in JavaScri...
4,"var filter = function(arr, fn) {\n const re...",easy,"const filter = (arr, fn) => arr.filter(fn);",The refactored code utilizes the built-in Java...


In [14]:
output_df.to_csv(f"output_files/CODE_REFACTOR_V2_{model_name}.csv")