In [1]:
import os
import json
import pandas as pd
import numpy as np
import time
from tqdm import tqdm  # Import tqdm for the progress bar
from evaluator import evaluate
from eslint import EslintProcessor
from google_closure_compiler import GoogleClosureCompiler
from chatgpt_api import ChatGPT  # Import ChatGPT
from gemini_api import gemini_response  # Import Gemini API
from collections import defaultdict

In [2]:
dataset_folder = './our_dataset'
# Define subfolder levels for processing
difficulty_levels = ['easy', 'medium', 'hard']

# Count total files to be processed for progress bar
total_files = sum([len(files) for level in difficulty_levels for _, _, files in os.walk(os.path.join(dataset_folder, level)) if files])

gt_code_lst = []
gt_dict = defaultdict(list)
#Load Dataset
# for i in range(total_files):
    # Loop through each difficulty level
for level in difficulty_levels:
    level_folder = os.path.join(dataset_folder, level)

    # Loop through each .js file in the current subfolder
    for file_name in os.listdir(level_folder):
        if file_name.endswith('.js'):
            full_file_path = os.path.join(level_folder, file_name)

            # Read the original code
            with open(full_file_path, 'r') as file:
                original_code = file.read()
                gt_code_lst.append(original_code)
                gt_dict[level].append(original_code)

In [2]:
code_gen = pd.read_csv("code_gen_dataset.csv")
code_gen.head()

Unnamed: 0,Problem Number,Name,Description,Examples,Solution,Constraints,Difficulty,Link,Function Header
0,1,Create Hello World Function,Write a function createHelloWorld. It should r...,"Example 1:\n\nInput: args = []\nOutput: ""Hello...",var createHelloWorld = function() { \n retu...,0 <= args.length <= 10,Easy,https://leetcode.com/problems/create-hello-wor...,/**\n * @return {Function}\n */\nvar createHel...
1,2,Counter 2,Write a function createCounter. It should acce...,"Example 1:\n\nInput: init = 5, calls = [""incre...",var createCounter = function(init) {\n let pr...,-1000 <= init <= 1000\n0 <= calls.length <= 10...,Easy,https://leetcode.com/problems/counter-ii/descr...,/**\n * @param {integer} init\n * @return { in...
2,3,Apply Transform Over Each Element in Array,Given an integer array arr and a mapping funct...,"Example 1:\n\nInput: arr = [1,2,3], fn = funct...","var map = function(arr, fn) {\r\n const tra...",0 <= arr.length <= 1000\n-10^9 <= arr[i] <= 10...,Easy,https://leetcode.com/problems/apply-transform-...,/**\n * @param {number[]} arr\n * @param {Func...
3,4,Filter Elements from Array,Given an integer array arr and a filtering fun...,"Example 1:\n\nInput: arr = [0,10,20,30], fn = ...","var filter = function(arr, fn) {\n const resu...",0 <= arr.length <= 1000\n-10^9 <= arr[i] <= 10^9,Easy,https://leetcode.com/problems/filter-elements-...,/**\n * @param {number[]} arr\n * @param {Func...
4,5,Array Reduce Transformation,"Given an integer array nums, a reducer functio...","Example 1:\n\nInput: \nnums = [1,2,3,4]\nfn = ...",/**\n * @param {number[]} nums\n * @param {Fun...,0 <= nums.length <= 1000\n0 <= nums[i] <= 1000...,Easy,https://leetcode.com/problems/array-reduce-tra...,/**\n * @param {number[]} nums\n * @param {Fun...


In [21]:
PROMPT_CODEGEN_V1="""
Task: Generate JavaScript code based on the following description.

Problem Description: {leetcode_desc}

Give your output in this particular schema: {schema}

Requirements:
1. Use modern JavaScript syntax (ES6+)
2. Implement error handling where appropriate
3. Follow best practices for code organization and readability
"""

PROMPT_REFACTOR_V1="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Give your output in this particular schema: {schema}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

PROMPT_REFACTOR_V2="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Original Code: {gt_code}

Give your output in this particular schema: {schema}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

out_schema = {
    "code": "generated javascript code",
    "rationale": "explain your approach keep it under 200 words"
}


# Experiment 3: Refactor with metadata from LeetCode added
PROMPT_REFACTOR_V3="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Examples: {examples}

Constraint: {constraint}

Original Code: {gt_code}

Give your output in this particular schema: {schema}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

# Experiment 4: Refactor with linter output given in the prompt. (need linter output from Jacob)
PROMPT_REFACTOR_V4="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Give your output in this particular schema: {schema}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
3. Use the suggestions provided the Linter to optimize the code

Linter output of the original code: {linter_output}
"""

# Experiment 5: Refactor with more specific requirements (e.g., make the solution 10 lines or less) (not trivial, the above cases first)
PROMPT_REFACTOR_V5="""
Task: Refactor the following JavaScript code to improve its quality, readability, and maintainability.

Problem Description: {leetcode_desc}

Original Code: {gt_code}

Give your output in this particular schema: {schema}

Requirements:
1. Improve the run-time of the algorithm
2. Improve the readability and quality of algorithm
"""

In [36]:
#GPT Refactoring
model_name = "gpt-4-turbo"
MAX_TOKENS_ANSWER = 1000
CG = ChatGPT(MODEL = model_name)
ans = []
rat = []
for index, row in code_gen.iterrows():
    code = row['Solution']
    desc = row['Description']
    ex = row['Examples']
    prob = row['Constraints']
    schema = row['Function Header']
    code_prompt = PROMPT_REFACTOR_V1.format(schema=schema, gt_code = code, leetcode_desc = desc, constraint = prob, examples=ex)
    response_txt = CG.get_gpt_json_response(code_prompt, out_schema ,MAX_TOKENS_ANSWER)
    print(response_txt)
    response_json = json.loads(response_txt)
    ans.append(response_json['code'])
    rat.append(response_json['rationale'])
    print("-------")

{
  "code": "/**\n * Creates a function that returns 'Hello World'.\n * @return {Function} A function that when called, returns 'Hello World'.\n */\nconst createHelloWorld = () => () => 'Hello World';\n\n/**\n * Example usage:\n * const helloFunc = createHelloWorld();\n * console.log(helloFunc()); // Outputs: 'Hello World'\n */",
  "rationale": "The refactored code uses ES6 arrow functions for conciseness and improved readability. By using arrow functions, the code becomes less verbose and easier to understand. The function createHelloWorld is defined as a constant to prevent reassignment, enhancing maintainability. The inner function is simplified to directly return the string 'Hello World', making the code cleaner and more straightforward. This refactoring does not change the runtime complexity as the operations remain constant time, O(1), but it significantly improves the readability and maintainability of the code."
}
-------
{
  "code": "/**\n * @param {integer} init\n * @return {

{
  "code": "/**\n * @param {Array<Function>} functions\n * @return {Promise<any>}\n */\nvar promiseAll = function(functions) {\n    return new Promise((resolve, reject) => {\n        if (functions.length === 0) {\n            resolve([]);\n            return;\n        }\n\n        let results = new Array(functions.length);\n        let completed = 0;\n\n        functions.forEach((fn, index) => {\n            fn().then(value => {\n                results[index] = value;\n                completed++;\n                if (completed === functions.length) {\n                    resolve(results);\n                }\n            }).catch(error => {\n                reject(error);\n            });\n        });\n    });\n};",
  "rationale": "The refactored code improves readability by using clear variable names such as 'results' and 'completed' to track the resolved values and the count of completed promises, respectively. It ensures that the promises are executed in parallel and the results a

{
  "code": "/**\n * @param {string} str1\n * @param {string} str2\n * @return {string}\n */\nvar gcdOfStrings = function(str1, str2) {\n    // Check if concatenation of str1 and str2 is the same as str2 and str1\n    if (str1 + str2 !== str2 + str1) {\n        return '';\n    }\n\n    // Function to calculate the greatest common divisor using Euclidean algorithm\n    function gcd(a, b) {\n        return b === 0 ? a : gcd(b, a % b);\n    }\n\n    // Calculate the length of the greatest common divisor of lengths\n    const maxLength = gcd(str1.length, str2.length);\n\n    // Return the substring from 0 to maxLength of str1, which is the greatest common divisor string\n    return str1.substring(0, maxLength);\n};",
  "rationale": "The refactored code improves readability by adding comments that explain each step, making it easier to understand the logic behind the function. The gcd function is defined separately to encapsulate its functionality, enhancing maintainability. The use of desc

{
  "code": "/**\n * @param {string} senate\n * @return {string}\n */\nvar predictPartyVictory = function(senate) {\n    let radiantCount = 0, direCount = 0;\n    let radiantBan = 0, direBan = 0;\n\n    for (let char of senate) {\n        if (char === 'R') radiantCount++;\n        else direCount++;\n    }\n\n    const queue = senate.split('');\n\n    while (radiantCount > 0 && direCount > 0) {\n        let senator = queue.shift();\n        if (senator === 'R') {\n            if (radiantBan > 0) {\n                radiantBan--;\n                radiantCount--;\n            } else {\n                direBan++;\n                queue.push(senator);\n            }\n        } else {\n            if (direBan > 0) {\n                direBan--;\n                direCount--;\n            } else {\n                radiantBan++;\n                queue.push(senator);\n            }\n        }\n    }\n\n    return radiantCount > 0 ? 'Radiant' : 'Dire';\n};",
  "rationale": "The refactored code uses

{
  "code": "/**\n * Converts a Roman numeral string to an integer.\n * @param {string} s - A string representing the Roman numeral.\n * @return {number} - The integer value of the Roman numeral.\n */\nvar romanToInt = function(s) {\n    const romanValues = {\n        'I': 1,\n        'V': 5,\n        'X': 10,\n        'L': 50,\n        'C': 100,\n        'D': 500,\n        'M': 1000\n    };\n\n    let total = 0;\n    let previousValue = 0;\n\n    for (let i = s.length - 1; i >= 0; i--) {\n        const currentValue = romanValues[s[i]];\n        if (currentValue < previousValue) {\n            total -= currentValue;\n        } else {\n            total += currentValue;\n        }\n        previousValue = currentValue;\n    }\n\n    return total;\n};",
  "rationale": "The refactored code improves readability by using more descriptive variable names and iterating from right to left, which aligns with the way Roman numerals are evaluated (considering subtraction rules). This approach elim

{
  "code": "/**\n * @param {number} n\n * @return {boolean}\n */\nvar isPowerOfThree = function(n) {\n    if (n < 1) return false;\n    while (n % 3 === 0) {\n        n /= 3;\n    }\n    return n === 1;\n};",
  "rationale": "The refactored code improves readability by using a while loop instead of recursion, which makes it easier to understand the process of dividing the number by 3 repeatedly. This approach also enhances the run-time efficiency by avoiding the overhead associated with recursive calls. The initial check (n < 1) quickly handles cases where n is zero or negative, ensuring the function only proceeds with positive integers. The while loop then continuously divides n by 3 as long as n is divisible by 3, and finally, n should be exactly 1 if it's a power of three. This method is straightforward and avoids the potential stack overflow risk of recursion in the original code."
}
-------
{
  "code": "/**\n * @param {number[]} nums\n * @return {number[]}\n */\nvar findDisappeare

{
  "code": "/**\n * @param {string} word1\n * @param {string} word2\n * @return {number}\n */\nvar minDistance = function(word1, word2) {\n    const m = word1.length, n = word2.length;\n    const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));\n\n    for (let i = 0; i <= m; i++) dp[i][0] = i;\n    for (let j = 0; j <= n; j++) dp[0][j] = j;\n\n    for (let i = 1; i <= m; i++) {\n        for (let j = 1; j <= n; j++) {\n            if (word1[i - 1] === word2[j - 1]) {\n                dp[i][j] = dp[i - 1][j - 1];\n            } else {\n                dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]);\n            }\n        }\n    }\n    return dp[m][n];\n};",
  "rationale": "The refactored code uses a dynamic programming table (dp) to store the minimum number of operations required to convert substrings of word1 to substrings of word2. This approach eliminates the need for recursion and memoization, which improves the runtime complexity from exponent

{
  "code": "/**\n * @param {number[]} nums\n * @return {number}\n */\nvar maxCoins = function(nums) {\n    const n = nums.length;\n    const paddedNums = [1, ...nums, 1];\n    const dp = Array.from({ length: n + 2 }, () => Array(n + 2).fill(0));\n\n    for (let left = n; left > 0; left--) {\n        for (let right = left; right <= n; right++) {\n            for (let i = left; i <= right; i++) {\n                const coins = paddedNums[left - 1] * paddedNums[i] * paddedNums[right + 1] + dp[left][i - 1] + dp[i + 1][right];\n                dp[left][right] = Math.max(dp[left][right], coins);\n            }\n        }\n    }\n\n    return dp[1][n];\n};",
  "rationale": "The refactored code improves readability by using more descriptive variable names and simplifying the array padding process. The original code's logic is maintained, but the refactoring focuses on making the code more understandable and maintainable. The use of 'paddedNums' avoids modifying the input array directly and cl

In [37]:
output_df = pd.DataFrame()
output_df['gt_code'] = code_gen['Solution']
output_df['level'] = code_gen['Difficulty']
output_df['refactor_code'] = ans
output_df['rationale'] = rat

output_df.head()

Unnamed: 0,gt_code,level,refactor_code,rationale
0,var createHelloWorld = function() { \n retu...,Easy,/**\n * Creates a function that returns 'Hello...,The refactored code uses ES6 arrow functions f...
1,var createCounter = function(init) {\n let pr...,Easy,/**\n * @param {integer} init\n * @return { in...,The refactored code improves readability by us...
2,"var map = function(arr, fn) {\r\n const tra...",Easy,/**\n * @param {number[]} arr\n * @param {Func...,The refactored code replaces the forEach loop ...
3,"var filter = function(arr, fn) {\n const resu...",Easy,/**\n * Filters an array based on a provided f...,The refactored code improves readability by us...
4,/**\n * @param {number[]} nums\n * @param {Fun...,Easy,/**\n * @param {number[]} nums\n * @param {Fun...,The original code manually implements the redu...


In [38]:
output_df.to_csv(f"output_files/openai_results_temp_0/PROMPT_REFACTOR_V1_{model_name}.csv")