In [8]:
%load_ext autoreload
%autoreload 2

In [9]:
import asyncio
from elasticsearch import OrjsonSerializer
import torch
from rag.loc_doc import *
from transformers import AutoModel, AutoTokenizer
from project_dataset import load_dataset
import elasticsearch_dsl as dsl
import evaluate
import numpy as np

In [48]:
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")
bertscore = evaluate.load("bertscore")

In [19]:
dsl.async_connections.create_connection(hosts=['http://localhost:9200'], serializer=OrjsonSerializer())


<AsyncElasticsearch(['http://localhost:9200'])>

In [5]:
checkpoint = "flax-sentence-embeddings/st-codesearch-distilroberta-base"
device = "cuda"  # for GPU usage or "cpu" for CPU usage

In [54]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('flax-sentence-embeddings/st-codesearch-distilroberta-base').to(device)

In [6]:
def get_loc(sample, linevul_ranking, linevul_top=10):
    v = linevul_ranking.tolist()
    breaked_lines = sample.split('\n')
    new_sample = '\n'.join([breaked_lines[i] for i in v[:linevul_top]]).strip()
    return new_sample

# attack vector

In [12]:
task = 'attack_vector'
ds = load_dataset(task)

In [13]:
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

In [61]:
train_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
train_doc = [(i, j) for i, j in zip(embeddings, target)]

In [63]:
async def ingest_attack_vector():
    if await AttackVecor._index.exists():
        await AttackVecor._index.delete()
    await AttackVecor.init()
    for row in train_doc:
        q = AttackVecor(explain=row[1], embedding=row[0])
        await q.save()

await ingest_attack_vector()

In [64]:
test_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
test_doc = [(i, j) for i, j in zip(embeddings, target)]

In [65]:
results = []
for row in test_doc:
    q = row[0]
    s = AttackVecor.search()
    s = s.query(dsl.query.Knn(field=AttackVecor.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

In [66]:
def store_results_to_file(results, file_path):
    with open(file_path, 'w') as file:
        for result in results:
            file.write(f'{result[0]}\n')

In [67]:
store_results_to_file(results, f'{task}_rag_bm25.txt')

In [69]:
rouge_results = []
bleu_results = []
bertscore_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[v[1]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

In [70]:
rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.6367213100379829
Average Rouge-2: 0.5654240413579364
Average Rouge-L: 0.6358387547990585
Average BLEU: 0.5128855464918658


# root cause

In [71]:
task = 'root_cause'
ds = load_dataset(task)
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

In [72]:
train_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
train_doc = [(i, j) for i, j in zip(embeddings, target)]

In [73]:
test_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
test_doc = [(i, j) for i, j in zip(embeddings, target)]

In [74]:
async def ingest_root_cause():
    if await RootCause._index.exists():
        await RootCause._index.delete()
    await RootCause.init()
    for row in train_doc:
        q = RootCause(explain=row[1], embedding=row[0])
        await q.save()

await ingest_root_cause()

In [75]:
results = []
for row in test_doc:
    q = row[0]
    s = RootCause.search()
    s = s.query(dsl.query.Knn(field=RootCause.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

In [76]:
store_results_to_file(results, f'{task}_rag_bm25.txt')

In [77]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

In [78]:
rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.4879555183270134
Average Rouge-2: 0.4569253397563256
Average Rouge-L: 0.4866442457053589
Average BLEU: 0.3315091576396999


# Impact

In [79]:
task = 'impact'
ds = load_dataset(task)
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

In [80]:
train_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
train_doc = [(i, j) for i, j in zip(embeddings, target)]


test_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
test_doc = [(i, j) for i, j in zip(embeddings, target)]

In [81]:
async def ingest_impact():
    if await Impact._index.exists():
        await Impact._index.delete()
    await Impact.init()
    for row in train_doc:
        q = Impact(explain=row[1], embedding=row[0])
        await q.save()

await ingest_impact()

In [82]:
results = []
for row in test_doc:
    q = row[0]
    s = RootCause.search()
    s = s.query(dsl.query.Knn(field=RootCause.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

store_results_to_file(results, f'{task}_rag_bm25.txt')

In [83]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.05576602497539942
Average Rouge-2: 0.004810337553798601
Average Rouge-L: 0.05181231829600175
Average BLEU: 0.00255885363357216


# vulnerability type

In [84]:
task = 'vulnerability_type'
ds = load_dataset(task)
train_ds = ds['train'].to_pandas()
test_ds = ds['test'].to_pandas()

train_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in train_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
train_doc = [(i, j) for i, j in zip(embeddings, target)]


test_doc = []
code = [] 
target = []
with torch.no_grad():
    for i_row, row in test_ds.iterrows():
        doc = get_loc(row['processed_func'], row['linevul_ranking'] )
        target.append( row["explain"] )
        code.append(doc)
embeddings = model.encode(code)
embeddings = [i.tolist() for i in embeddings]
test_doc = [(i, j) for i, j in zip(embeddings, target)]

In [85]:
async def ingest_vulnerability_type():
    if await VulnerabilityType._index.exists():
        await VulnerabilityType._index.delete()
    await VulnerabilityType.init()
    for row in train_doc:
        q = VulnerabilityType(explain=row[1], embedding=row[0])
        await q.save()

await ingest_vulnerability_type()

In [88]:
results = []
for row in test_doc:
    q = row[0]
    s = RootCause.search()
    s = s.query(dsl.query.Knn(field=RootCause.embedding, query_vector=q))
    r = await s[:1].execute()
    results.append( (r.hits[0].explain, row[1]) )

store_results_to_file(results, f'{task}_rag_bm25.txt')

In [87]:
rouge_results = []
bleu_results = []
for v in results:
    r_ = rouge.compute(predictions=[v[0]], references=[v[1]])
    b_ = bleu.compute(predictions=[v[0]], references=[[v[1]]])
    rouge_results.append((r_['rouge1'], r_['rouge2'], r_['rougeL']))
    bleu_results.append(b_)

rouge_results_array = np.array(rouge_results)
bleu_results_array = np.array([b['bleu'] for b in bleu_results])

avg_rouge1 = np.mean(rouge_results_array[:, 0])
avg_rouge2 = np.mean(rouge_results_array[:, 1])
avg_rougeL = np.mean(rouge_results_array[:, 2])

avg_bleu = np.mean(bleu_results_array)

# Print the average values
print("Average Rouge-1:", avg_rouge1)
print("Average Rouge-2:", avg_rouge2)
print("Average Rouge-L:", avg_rougeL)

print("Average BLEU:", avg_bleu)

Average Rouge-1: 0.09399278013654645
Average Rouge-2: 0.06398645127403457
Average Rouge-L: 0.09399278013654645
Average BLEU: 0.0
