# Run inference on a single code snippet

In [1]:
from modules.models import CodeT5, CodeBertJS
from transformers import RobertaTokenizer
from difflib import unified_diff
from difflib import SequenceMatcher
import pandas as pd
import torch
import os

# Load Model and Tokenizer

In [2]:
CPKT_PATH = 'checkpoints/CodeT5_large_JS_5classes_1024MaxL_v4.ckpt' if os.path.exists('checkpoints/CodeT5_large_JS_5classes_1024MaxL_v5.ckpt') else ''
MODEL_NAME = CPKT_PATH.split('/')[-1].split('.')[0].split('_')[0]
if 'CodeT5' in MODEL_NAME:
    HF_DIR = 'Salesforce/codet5-base'
    model = CodeT5.load_from_checkpoint(
        CPKT_PATH,
        num_classes=5,
        model_dir=HF_DIR,
        with_activation=True,
        with_layer_norm=True
    )
else:
    HF_DIR = 'microsoft/codebert-base-mlm'
    model = CodeBertJS.load_from_checkpoint(CPKT_PATH)
    
model.eval()
model.to('cpu')

CodeT5(
  (model): T5ForConditionalGeneration(
    (shared): Embedding(32100, 768)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32100, 768)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=768, out_features=768, bias=False)
                (k): Linear(in_features=768, out_features=768, bias=False)
                (v): Linear(in_features=768, out_features=768, bias=False)
                (o): Linear(in_features=768, out_features=768, bias=False)
                (relative_attention_bias): Embedding(32, 12)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=768, out_features=3072, bias=False)
                (wo): Linear(in_features=307

# Run inference on buggy code

In [11]:
all_bug_types = ['functionality', 'network-security', 'ui-ux', 'compatibility-performance', 'general']
all_bug_types_str = " ".join(all_bug_types)
bug_type = input(f"Select a bug type to run inference on ({all_bug_types_str})")
if bug_type not in all_bug_types:
    raise Exception('Invalid Bug Type Selected')

def read_sample(bugType: str) -> tuple :
    inf_samples = pd.read_json('inference_examples.json')
    sample = inf_samples[inf_samples['bug_type'] == bugType].iloc[0].to_dict()
    return sample['buggy_code'], sample['correct_code'], sample['bug_type']

buggy_code, correct_code, bug_type = read_sample(bug_type)
print(f"Bug type: {bug_type}")
print('--------------------- Buggy Code ---------------------')
print(buggy_code)
print('-------------------- Correct Code --------------------')
print(correct_code)

Bug type: compatibility-performance
--------------------- Buggy Code ---------------------
// Given a list of dom element ids with arbitrary length, write a function that changes their background color to yellow
function highlightItems(ids) {
  ids.forEach(id => {
    const element = document.getElementById(id);
    if (element) {
      element.style.backgroundColor = "yellow";
    }
  });
}
-------------------- Correct Code --------------------
// Given a list of dom element ids with arbitrary length, write a function that changes their background color to yellow
function highlightItems(ids) {
  const elements = ids.map(id => document.getElementById(id));
  elements.forEach(element => {
    if (element) {
      element.style.backgroundColor = "yellow";
    }
  });
}


In [12]:
tokenizer = RobertaTokenizer.from_pretrained(HF_DIR)
encoded_buggy_code = tokenizer(buggy_code, padding=True, truncation=True, return_tensors='pt')
encoded_correct_code = tokenizer(correct_code, padding=True, truncation=True, return_tensors='pt')
batch = {
    'input_ids': encoded_buggy_code['input_ids'],
    'attention_mask': encoded_buggy_code['attention_mask'],
    'labels': encoded_correct_code['input_ids'],
}

with torch.no_grad():
    _, out, bug_class = model.forward(batch)
    probs = torch.softmax(bug_class, dim=1)
    pred_class = model.classes[torch.argmax(probs, dim=1).item()]

generated_code = tokenizer.batch_decode(torch.argmax(out, dim=-1), skip_special_tokens=True)[0]

In [13]:
print(generated_code)

// Given a list of dom element ids with arbitrary length, write a function to changes their background color to yellow
function highlightItems(ids) {
  ids element = document.map(id => {.getElementById(id));
    if.forEach(id => {
    const (element) {
      element.style.backgroundColor = "yellow";
    }
  });



# Συγκρίσεις

#### Διαφορές : Κώδικας με σφάλματα - Διορθωμένος κώδικας (ground truth)

In [14]:
real_codeDiff = unified_diff(buggy_code.splitlines(), correct_code.splitlines())
print("\n".join(real_codeDiff))

--- 

+++ 

@@ -1,7 +1,7 @@

 // Given a list of dom element ids with arbitrary length, write a function that changes their background color to yellow
 function highlightItems(ids) {
-  ids.forEach(id => {
-    const element = document.getElementById(id);
+  const elements = ids.map(id => document.getElementById(id));
+  elements.forEach(element => {
     if (element) {
       element.style.backgroundColor = "yellow";
     }


#### Διαφορες : Κώδικας με σφάλματα - Κώδικας που παρήγαγε το μοντέλο

In [15]:
model_codeDiff = unified_diff(buggy_code.splitlines(), generated_code.splitlines())
print("\n".join(model_codeDiff))

--- 

+++ 

@@ -1,9 +1,8 @@

-// Given a list of dom element ids with arbitrary length, write a function that changes their background color to yellow
+// Given a list of dom element ids with arbitrary length, write a function to changes their background color to yellow
 function highlightItems(ids) {
-  ids.forEach(id => {
-    const element = document.getElementById(id);
-    if (element) {
+  ids element = document.map(id => {.getElementById(id));
+    if.forEach(id => {
+    const (element) {
       element.style.backgroundColor = "yellow";
     }
   });
-}


#### Διαφορές : Κώδικας που παρήγαγε το μοντέλο - Διορθωμένος κώδικας

In [16]:
codeDiff = unified_diff(generated_code.splitlines(), correct_code.splitlines())
print("\n".join(codeDiff))

--- 

+++ 

@@ -1,8 +1,9 @@

-// Given a list of dom element ids with arbitrary length, write a function to changes their background color to yellow
+// Given a list of dom element ids with arbitrary length, write a function that changes their background color to yellow
 function highlightItems(ids) {
-  ids element = document.map(id => {.getElementById(id));
-    if.forEach(id => {
-    const (element) {
+  const elements = ids.map(id => document.getElementById(id));
+  elements.forEach(element => {
+    if (element) {
       element.style.backgroundColor = "yellow";
     }
   });
+}


### Σύγκριση χαρακτήρων:

#### Σύγκριση χαρακτήρα προς χαρακτήρα μεταξύ του κώδικα με σφάλματα (ακολουθία εισόδου) με τον διορθωμένο κώδικα (ground truth)

In [17]:
sm = SequenceMatcher(None, buggy_code, correct_code)

for opcode, i1,i2,j1,j2 in sm.get_opcodes():
    if opcode != 'equal':
        print(opcode)
        if opcode == 'insert':
            print(generated_code[j1:j2])
        elif opcode == 'replace': 
            print(buggy_code[i1:i2])
            print(generated_code[j1:j2])
        elif opcode == 'delete':
            print(buggy_code[i1:i2])

replace
ids.forEach
s element = document.map
insert
{.getElementById(id));
    if.forEach(id => {
    const (ele
delete

    const element = document.getElementById(id);


### Σύκγριση Χαρακτήρων:

#### Σύγκριση χαρακτήρα προς χαρακτήρα μεταξύ του κώδικα που παρήγαγε το μοντέλο με τον διορθωμένο κώδικα (ground truth)

In [18]:
sm = SequenceMatcher(None, buggy_code, generated_code)

for opcode, i1,i2,j1,j2 in sm.get_opcodes():
    if opcode != 'equal':
        print(opcode)
        if opcode == 'insert':
            print(generated_code[j1:j2])
        elif opcode == 'replace': 
            print(buggy_code[i1:i2])
            print(generated_code[j1:j2])
        elif opcode == 'delete':
            print(buggy_code[i1:i2])

replace
hat
o
replace
.forEach
 element = document.map
replace

    const element
.getElementById(id));
    if.forEach(id
replace
 document.getElementById(id);
    if
> {
    const
delete
}
