In [1]:
from project_dataset import load_dataset

In [2]:
from dataclasses import dataclass

@dataclass
class Args:
    model_name = "Salesforce/codet5p-770m"
    num_proc = 4
    batch_size = 2
    max_src_length = 1200
    max_des_length = 153
    data_cols = ["CVE ID", "explain", "func_before", "processed_func"]
    save_dir = 'tf_board'
    epochs = 11
    grad_acc_steps = 4
    lr = 5e-5
    log_freq = 10
    local_rank = -1
    deepspeed = None
    fp16 = True
    lr_warmup_steps = 200
    weight_decay = 0.05
    task = "root_cause"
    prefix = "770m"
    
args = Args()

In [3]:
ds = load_dataset(args.task)

In [4]:
ds

DatasetDict({
    train: Dataset({
        features: ['CVE ID', 'explain', 'func_before', 'processed_func'],
        num_rows: 3431
    })
    validation: Dataset({
        features: ['CVE ID', 'explain', 'func_before', 'processed_func'],
        num_rows: 382
    })
    test: Dataset({
        features: ['CVE ID', 'explain', 'func_before', 'processed_func'],
        num_rows: 954
    })
})

In [5]:
import pandas as pd

In [6]:
import evaluate

rouge = evaluate.load("rouge")

In [7]:
df_test = ds['test']
df_test = df_test.to_pandas()

In [8]:
df_1 = pd.read_csv('results/root_cause/t5p_script_770m/generated_predictions.txt', sep='\t', header=None)

In [9]:
preds = df_1[0].values.tolist()

In [10]:
references = df_test['explain'].values.tolist()

In [11]:
df = []
for i, v in enumerate(zip(preds, references)):
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    df.append((i, r_['rouge1'], r_['rouge2'], r_['rougeL']))

In [12]:
df_ = pd.DataFrame(df, columns=['id', 'rouge1', 'rouge2', 'rougeL'])

In [14]:
df_.to_csv(f"{args.task}_t5p_770m.tsv", sep='\t')

## Analytics

In [13]:
import pyarrow.parquet as pq
table = pq.read_table('data/MSR_data_cleaned.parquet')
df_bigvul = table.to_pandas()
df_bigvul.drop_duplicates(subset='CVE ID', keep='first', inplace=True)

In [14]:
df_foo = df_test.merge(df_bigvul, on='CVE ID', how='left')[['CVE ID', 'CWE ID']]
CWE_IDs = df_foo['CWE ID'].unique()
df_foo_final = pd.concat([df_, df_foo], axis=1)
average_rougeL = df_foo_final.groupby('CWE ID')['rougeL'].agg(['mean', 'count'])
average_rougeL[average_rougeL['count'] >= 10].sort_values('mean', ascending=True).reset_index()

Unnamed: 0,CWE ID,mean,count
0,CWE-476,0.543013,10
1,CWE-200,0.651683,45
2,CWE-416,0.728199,20
3,CWE-189,0.77538,30
4,CWE-362,0.785367,35
5,CWE-17,0.808012,12
6,CWE-264,0.812219,56
7,CWE-20,0.820392,129
8,CWE-119,0.823079,232
9,CWE-399,0.847303,64


In [15]:
len(CWE_IDs)

48