In [None]:
! pip install sklearn nltk rouge

# Imports

In [None]:
from fastai.text import *
from statistics import mean, median, stdev

import sentencepiece as spm

In [None]:
import sys

sys.path.append("../../")
from eval.exp.nb_evaluation import *

sys.path.append("../../../")
from src.prep.exp.nb_prep import *
from src.proc.exp.nb_proc import *

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# setup paths and model type
model_path = Path("/tf/data/models")
data_path  = Path("/tf/data/datasets")

task_type = "buggy"

In [None]:
sp = spm.SentencePieceProcessor()
sp.Load(str(data_path/"merged/model.model"))

# Load Data

In [None]:
df_trn, df_val, df_tst = read_data(data_path/task_type)

In [None]:
len(df_trn), len(df_val), len(df_tst)

In [None]:
bs = 8

Percentage of data to be used: sample 

In [None]:
data = gen_lm_data(df_trn, df_val, task_type, data_path, bs = bs)
data.save(task_type + '/data_lm_100pct.pkl')

In [None]:
data = load_data(data_path/task_type, 'data_lm_100pct.pkl', bs = bs)

In [None]:
len(data.train_ds), len(data.valid_ds)

In [None]:
data.show_batch()

# Model Setup

In [None]:
pretrained = False

In [None]:
# amit experiments
learn = language_model_learner(
    data, Transformer, pretrained = pretrained, metrics=[accuracy]
)

In [None]:
learn.lr_find()
learn.recorder.plot()

# Model Training

In [None]:
max_lr = 5e-4
moms = (0.75, 0.825)
pct_strt = 0.02
a_epochs = 15

In [None]:
callback_fns = [
    callbacks.SaveModelCallback(
        learn, every='improvement',
        monitor='valid_loss', name=f'transformer_{task_type}_save_model'
    ),
    callbacks.EarlyStoppingCallback(
        learn, monitor='valid_loss', min_delta = 0.01,
        patience = 3
    )
]

In [None]:
#amit experiments
learn.fit_one_cycle(
    a_epochs, max_lr, moms = moms,
    pct_start = pct_strt, callbacks = callback_fns
)

In [None]:
!curl -X POST -H 'Content-type: application/json' --data '{"text":"from: semeru tower 1\nstatus: model finished training"}' https://hooks.slack.com/services/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

In [None]:
learn.load(f'transformer_{task_type}_save_model')

In [None]:
learn.recorder.plot_metrics()

In [None]:
# learn.recorder.plot_losses()
figure_plot = learn.recorder.plot_losses(return_fig=True)
figure_plot.savefig(fname=f"transformer_{task_type}_plot_losses.png", format='png')

In [None]:
from PIL import Image
Image.open(f'/tf/main/nbs/mdling/transformer/transformer_{task_type}_plot_losses.png')

# Model Evaluation

### Vulnerability Classification

In [None]:
get_res(learn, "public static void main(String[] args) {}<$bug$>", sp, n_toks = 100)

In [None]:
task_type = "buggy"
vuln_trn, vuln_val, vuln_tst = read_data(data_path/task_type)

In [None]:
vuln_val = tag_task(vuln_val, task_type)

In [None]:
acc, prec, recal = eval_vuln(learn, vuln_val[:10], sp = sp)

In [None]:
acc, prec, recal

### Comment Generation

In [None]:
task_type = "mthds_cmts"
cmt_trn, cmt_val, cmt_tst = read_data(data_path/task_type)

In [None]:
cmt_val = tag_task(cmt_val, task_type)

In [None]:
b1, b2, b3, b4, meteor, preds = eval_txt(learn, cmt_val[:10], sp = sp)

In [None]:
mean(b1), mean(b2), mean(b3), mean(b4)

In [None]:
mean(meteor)

In [None]:
preds[9]

In [None]:
cmt_val['query'][9], cmt_val['res'][9]

In [None]:
mean(rouge_l)

### StackOverflow QA

In [None]:
task_type = "so_posts"
so_trn, so_val, so_tst = read_data(data_path/task_type)

In [None]:
so_val = tag_task(so_val, task_type)

In [None]:
b1, b2, b3, b4, meteor, preds = eval_txt(learn, so_val[:10], sp = sp)

In [None]:
mean(b1), mean(b2), mean(b3), mean(b4)

In [None]:
mean(meteor)

In [None]:
preds[9]

In [None]:
so_val['query'][9], so_val['res'][9]

In [None]:
mean(rouge_l)