# Colab Initialization

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!git clone https://github.com/radistoubalidis/JSRepair.git

!python -m pip install lightning
!pip install datasets
!pip install python-dotenv
!pip install rouge-score
!pip install diff-match-patch
!pip install gspread google-auth

Cloning into 'JSRepair'...
remote: Enumerating objects: 573, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (66/66), done.[K
remote: Total 573 (delta 60), reused 45 (delta 23), pack-reused 484 (from 1)[K
Receiving objects: 100% (573/573), 2.17 MiB | 24.92 MiB/s, done.
Resolving deltas: 100% (392/392), done.
Collecting lightning
  Downloading lightning-2.5.0.post0-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.4/40.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.11.9-py3-none-any.whl.metadata (5.2 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Downloading lightning-2.5.0.post0-py3-none-any.whl (

In [1]:
%cd ./JSRepair

/content/JSRepair


# Dependencies

In [2]:
from modules.models import CodeT5, CodeBertJS
from transformers import RobertaTokenizer
from difflib import unified_diff
from difflib import SequenceMatcher
import pandas as pd
import torch
import os

# Load Model and Tokenizer

In [16]:
CPKT_PATH = input('Paste model checkpoint path: ')
if not os.path.exists(CPKT_PATH):
    raise FileNotFoundError(CPKT_PATH)
MODEL_NAME = CPKT_PATH.split('/')[-1].split('.')[0].split('_')[0]

if 'CodeT5' in MODEL_NAME:
    HF_DIR = 'Salesforce/codet5-base'
    model = CodeT5.load_from_checkpoint(
        CPKT_PATH,
        num_classes=5,
        model_dir=HF_DIR,
        with_activation=True,
        with_layer_norm=True
    )
else:
    HF_DIR = 'microsoft/codebert-base-mlm'
    model = CodeBertJS.load_from_checkpoint(
        CPKT_PATH,
        num_classes=5,
        model_dir=HF_DIR,
        with_activation=True,
        with_layer_norm=True
    )

model.eval()
model.to('cpu')

Paste model checkpoint path: /content/drive/MyDrive/Thesis/checkpoints/CodeT5_base_JS_5classes_512MaxL_v906-v1.ckpt


CodeT5(
  (model): T5ForConditionalGeneration(
    (shared): Embedding(32100, 768)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32100, 768)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=768, out_features=768, bias=False)
                (k): Linear(in_features=768, out_features=768, bias=False)
                (v): Linear(in_features=768, out_features=768, bias=False)
                (o): Linear(in_features=768, out_features=768, bias=False)
                (relative_attention_bias): Embedding(32, 12)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=768, out_features=3072, bias=False)
                (wo): Linear(in_features=307

# Run inference on buggy code

In [17]:
import sqlite3
from modules.filters import add_labels

classLabels = {
        "functionality" : 0.,
        "ui-ux" : 0.,
        "compatibility-performance" : 0.,
        "network-security" : 0.,
        "general": 0.
    }

all_bug_types = ['functionality', 'network-security', 'ui-ux', 'compatibility-performance', 'general']
all_bug_types_str = " ".join(all_bug_types)
bug_type = input(f"Select a bug type to run inference on ({all_bug_types_str}): ")
if bug_type not in all_bug_types:
    raise Exception('Invalid Bug Type Selected')


DB_TABLE = 'inference_examples.json'
df = pd.read_json(DB_TABLE)
df['class_labels'] = df['bug_type'].apply(lambda bT: add_labels(bT.split(','), classLabels))
sample = df[df['bug_type'].str.contains(bug_type)].sample(1).iloc[0].to_dict()
buggy_code, correct_code, bug_type, labels = sample['buggy_code'], sample['correct_code'], sample['bug_type'], torch.tensor(sample['class_labels'])

print(f"Bug type: {bug_type}")
print('--------------------- Buggy Code ---------------------')
print(buggy_code)
print('-------------------- Correct Code --------------------')
print(correct_code)

Select a bug type to run inference on (functionality network-security ui-ux compatibility-performance general): general
Bug type: general
--------------------- Buggy Code ---------------------
// Write a function to display the Fibonacci sequence using recursion
function fibonacci(n) {
  if (n <= 1) {
    return n;
  } else {
    return fibonacci(n + 1) + fibonacci(n + 2);
  }
}
-------------------- Correct Code --------------------
// Write a function to display the Fibonacci sequence using recursion
function fibonacci(n) {
  if (n <= 1) {
    return n;
  } else {
    return fibonacci(n - 1) + fibonacci(n - 2);
  }
}


In [18]:
tokenizer = RobertaTokenizer.from_pretrained(HF_DIR)
encoded_buggy_code = tokenizer(buggy_code, padding=True, truncation=True, return_tensors='pt')
encoded_correct_code = tokenizer(correct_code, padding=True, truncation=True, return_tensors='pt')

if 'CodeT5' in MODEL_NAME:
    batch = {
        'input_ids': encoded_buggy_code['input_ids'],
        'attention_mask': encoded_buggy_code['attention_mask'],
        'labels': encoded_correct_code['input_ids'],
        'class_labels': labels
    }
else:
    batch = {
        'input_ids': encoded_buggy_code['input_ids'],
        'attention_mask': encoded_buggy_code['attention_mask'],
        'gt_input_ids': encoded_correct_code['input_ids'],
        'class_labels': labels
    }


with torch.no_grad():
    _, out, bug_class = model.forward(batch)
    preds = torch.sigmoid(bug_class)
    probs = (preds > 0.5).float().tolist()[0]
    pred_classes = []
    for i, p in enumerate(probs):
        if p == 1:
            pred_classes.append(model.classes[i])

print(pred_classes)
generated_code = model.decode_output(out)
print(generated_code)

['functionality', 'compatibility-performance']
// Write a function to display the Fibonacci sequence using recursion
function fibonacci(n) {
  if (n <= 1) {
    return f;
  } else if
    return fibonacci(n + 1) + fibonacci(n + 2);
  }
}


# Συγκρίσεις

#### Διαφορές : Κώδικας με σφάλματα - Διορθωμένος κώδικας (ground truth)

In [12]:
real_codeDiff = unified_diff(buggy_code.splitlines(), correct_code.splitlines())
print("\n".join(real_codeDiff))

--- 

+++ 

@@ -3,6 +3,6 @@

   if (n <= 1) {
     return n;
   } else {
-    return fibonacci(n + 1) + fibonacci(n + 2);
+    return fibonacci(n - 1) + fibonacci(n - 2);
   }
 }


#### Διαφορες : Κώδικας με σφάλματα - Κώδικας που παρήγαγε το μοντέλο

In [13]:
model_codeDiff = unified_diff(buggy_code.splitlines(), generated_code.splitlines())
print("\n".join(model_codeDiff))

--- 

+++ 

@@ -1,8 +0,0 @@

-// Write a function to display the Fibonacci sequence using recursion
-function fibonacci(n) {
-  if (n <= 1) {
-    return n;
-  } else {
-    return fibonacci(n + 1) + fibonacci(n + 2);
-  }
-}


#### Διαφορές : Κώδικας που παρήγαγε το μοντέλο - Διορθωμένος κώδικας

In [14]:
codeDiff = unified_diff(generated_code.splitlines(), correct_code.splitlines())
print("\n".join(codeDiff))

--- 

+++ 

@@ -0,0 +1,8 @@

+// Write a function to display the Fibonacci sequence using recursion
+function fibonacci(n) {
+  if (n <= 1) {
+    return n;
+  } else {
+    return fibonacci(n - 1) + fibonacci(n - 2);
+  }
+}


### Σύγκριση χαρακτήρων:

#### Σύγκριση χαρακτήρα προς χαρακτήρα μεταξύ του κώδικα με σφάλματα (ακολουθία εισόδου) με τον διορθωμένο κώδικα (ground truth)

In [22]:
sm = SequenceMatcher(None, buggy_code, correct_code)

for opcode, i1,i2,j1,j2 in sm.get_opcodes():
    if opcode != 'equal':
        print(opcode)
        if opcode == 'insert':
            print(generated_code[j1:j2])
        elif opcode == 'replace':
            print(buggy_code[i1:i2])
            print(generated_code[j1:j2])
        elif opcode == 'delete':
            print(buggy_code[i1:i2])

replace
+

replace
+



### Σύκγριση Χαρακτήρων:

#### Σύγκριση χαρακτήρα προς χαρακτήρα μεταξύ του κώδικα που παρήγαγε το μοντέλο με τον διορθωμένο κώδικα (ground truth)

In [23]:
sm = SequenceMatcher(None, buggy_code, generated_code)

for opcode, i1,i2,j1,j2 in sm.get_opcodes():
    if opcode != 'equal':
        print(opcode)
        if opcode == 'insert':
            print(generated_code[j1:j2])
        elif opcode == 'replace':
            print(buggy_code[i1:i2])
            print(generated_code[j1:j2])
        elif opcode == 'delete':
            print(buggy_code[i1:i2])

delete
// Write a function to display the Fibonacci sequence using recursion
function fibonacci(n) {
  if (n <= 1) {
    return n;
  } else {
    return fibonacci(n + 1) + fibonacci(n + 2);
  }
}
