In [1]:
import os
from datasets import load_dataset
import sqlite3
import pandas as pd
import torch
import json
from transformers import RobertaTokenizer
from modules.models import CodeT5
from modules.datasets import CodeT5Dataset
from modules.metrics import CodeRouge
from torch.utils.data import DataLoader

SQLITE_PATH = input("Paste sqlite3 path: ")
MODEL_NAME = input("Paste Model Name: ")
VERSION = int(input("Paste model version: "))
CPKT_PATH = input('Paste cpkt path: ')
METRICS_PATH = input('Paste metris dir path: ')
HF_DIR = input('Paste hf model dir')
MAX_LENGTH = int(input("Paste tokenizer max length: "))

In [2]:
if os.path.exists(SQLITE_PATH):
    con = sqlite3.connect(SQLITE_PATH)
    ds_df = pd.read_sql_query("SELECT * FROM humanevalpack", con).set_index('index')
else:
    ds = load_dataset(
        'bigcode/humanevalpack',
        'js',
    )
    ds_list = []
    for row in ds.get('test'):
        ds_list.append(row)
    ds_df = pd.DataFrame(ds_list)
    con = sqlite3.connect(SQLITE_PATH)
    ds_df.to_sql('humanevalpack',con)


In [3]:
model = CodeT5.load_from_checkpoint(CPKT_PATH)
if model.device != 'cpu':
    model.to('cpu')

model.mode = 'test'

In [4]:
tokenizer = RobertaTokenizer.from_pretrained(HF_DIR)
encoded_samples = tokenizer(
    ds_df['buggy_solution'].tolist(),
    max_length=MAX_LENGTH,
    padding='max_length',
    truncation=True,
    return_tensors='pt',
)

encoded_labels = tokenizer(
    ds_df['canonical_solution'].tolist(),
    max_length=MAX_LENGTH,
    padding='max_length',
    truncation=True,
    return_tensors='pt',
)

In [5]:
torch_ds = CodeT5Dataset(encoded_samples, encoded_labels)
loader = DataLoader(torch_ds, batch_size=1)

generated_codes = []
for batch in iter(loader):
    out = model.forward(batch)
    decoded_out = torch.argmax(out[1], dim=-1)
    generated_codes.append(tokenizer.batch_decode(decoded_out, skip_special_tokens=True)[0])
    
references = ds_df['canonical_solution'].tolist()

if(len(generated_codes) != len(references)):
    raise ValueError("Invalid model output.")

In [6]:
rouge = CodeRouge(['rouge6', 'rouge7','rouge8','rouge9','rougeL','rougeLsum'])

rouge.compute(predictions=generated_codes, references=references)
rouge.calc_averages()

avgs_path = f"{METRICS_PATH}/{MODEL_NAME}_{VERSION}.json"
all_path = f"{METRICS_PATH}/{MODEL_NAME}_{VERSION}.csv"
with open(avgs_path, 'w') as f:
    json.dump(rouge.avgs, f)

all_scores = []
for r in rouge.rouge_types:
    all_scores += rouge.rouge_type_to_list(r)

metrics_df = pd.DataFrame(all_scores)

for m in ['precision','recall','fmeasure']:
    metrics_df[m] = round(metrics_df[m], 3)
metrics_df.to_csv(all_path, index='table')