In [1]:
%load_ext autoreload
%autoreload 2

In [42]:
import asyncio
from elasticsearch import OrjsonSerializer
import torch
from rag.loc_doc import *
from transformers import AutoModel, AutoTokenizer
from project_dataset import load_dataset
import elasticsearch_dsl as dsl
import evaluate
import numpy as np

In [20]:
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

In [4]:
dsl.async_connections.create_connection(hosts=['http://localhost:9200'], serializer=OrjsonSerializer())


<AsyncElasticsearch(['http://localhost:9200'])>

In [5]:
checkpoint = "Salesforce/codet5p-110m-embedding"
device = "cuda"  # for GPU usage or "cpu" for CPU usage

tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
model = AutoModel.from_pretrained(checkpoint, trust_remote_code=True).to(device)

In [6]:
def get_loc(sample, linevul_ranking, linevul_top=10):
    v = linevul_ranking.tolist()
    breaked_lines = sample.split('\n')
    new_sample = '\n'.join([breaked_lines[i] for i in v[:linevul_top]]).strip()
    return new_sample

# attack vector

In [7]:
task = 'attack_vector'
ds = load_dataset(task)

In [8]:
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

In [9]:
train_doc = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        train_doc.append( (embedding, target) )

Token indices sequence length is longer than the specified maximum sequence length for this model (1435 > 512). Running this sequence through the model will result in indexing errors


In [10]:
async def ingest_attack_vector():
    if await AttackVecor._index.exists():
        await AttackVecor._index.delete()
    await AttackVecor.init()
    for row in train_doc:
        q = AttackVecor(explain=row[1], embedding=row[0])
        await q.save()

await ingest_attack_vector()

In [11]:
test_doc = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        test_doc.append( (embedding, target) )

In [12]:
results = []
for row in test_doc:
    q = row[0]
    s = AttackVecor.search()
    s = s.query(dsl.query.Knn(field=AttackVecor.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

In [13]:
def store_results_to_file(results, file_path):
    with open(file_path, 'w') as file:
        for result in results:
            file.write(f'{result[0]}\t{result[1]}\n')

In [14]:
store_results_to_file(results, f'{task}_rag.txt')

In [21]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

In [25]:
rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.703136168282482
Average Rouge-2: 0.6431229793234713
Average Rouge-L: 0.7025186341559587
Average BLEU: 0.5987995240469469


# root cause

In [26]:
task = 'root_cause'
ds = load_dataset(task)
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

In [28]:
train_doc = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        train_doc.append( (embedding, target) )

In [29]:
test_doc = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        test_doc.append( (embedding, target) )

In [32]:
async def ingest_root_cause():
    if await RootCause._index.exists():
        await RootCause._index.delete()
    await RootCause.init()
    for row in train_doc:
        q = RootCause(explain=row[1], embedding=row[0])
        await q.save()

await ingest_root_cause()

In [33]:
results = []
for row in test_doc:
    q = row[0]
    s = RootCause.search()
    s = s.query(dsl.query.Knn(field=RootCause.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

In [34]:
store_results_to_file(results, f'{task}_rag.txt')

In [35]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

In [36]:
rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.6004329108516657
Average Rouge-2: 0.5731268817621876
Average Rouge-L: 0.5995116104775612
Average BLEU: 0.4187846760123499


# Impact

In [38]:
task = 'impact'
ds = load_dataset(task)
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

train_doc = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        train_doc.append( (embedding, target) )


test_doc = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        test_doc.append( (embedding, target) )

In [39]:
async def ingest_impact():
    if await Impact._index.exists():
        await Impact._index.delete()
    await Impact.init()
    for row in train_doc:
        q = Impact(explain=row[1], embedding=row[0])
        await q.save()

await ingest_impact()

In [40]:
results = []
for row in test_doc:
    q = row[0]
    s = RootCause.search()
    s = s.query(dsl.query.Knn(field=RootCause.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

store_results_to_file(results, f'{task}_rag.txt')

In [41]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.054793657927483315
Average Rouge-2: 0.005817571017177101
Average Rouge-L: 0.04996393263301194
Average BLEU: 0.003770665015952632


# vulnerability type

In [43]:
task = 'vulnerability_type'
ds = load_dataset(task)
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

train_doc = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        train_doc.append( (embedding, target) )


test_doc = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] ) 
        target = row["explain"]
        inputs = tokenizer.encode(doc, return_tensors="pt").to(device)
        embedding = model(inputs)[0].tolist()
        test_doc.append( (embedding, target) )

In [44]:
async def ingest_vulnerability_type():
    if await VulnerabilityType._index.exists():
        await VulnerabilityType._index.delete()
    await VulnerabilityType.init()
    for row in train_doc:
        q = VulnerabilityType(explain=row[1], embedding=row[0])
        await q.save()

await ingest_vulnerability_type()

In [45]:
results = []
for row in test_doc:
    q = row[0]
    s = RootCause.search()
    s = s.query(dsl.query.Knn(field=RootCause.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

store_results_to_file(results, f'{task}_rag.txt')

In [46]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.09700474754688473
Average Rouge-2: 0.06274229838025343
Average Rouge-L: 0.09681887394093679
Average BLEU: 0.0
