In [9]:
import numpy as np
import pandas as pd
import torch
from tqdm.notebook import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, StratifiedKFold
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, get_linear_schedule_with_warmup
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from sklearn.model_selection import KFold
import datetime

In [10]:
batch_size = 16
df = pd.read_csv('dataset.csv')
labels = df['Artifact Id']
label_counts = labels.value_counts()
filtered_labels = label_counts[(label_counts >= 5) & (label_counts <= 200)] # Remove "Command" label
filtered_labels_list = filtered_labels.index.tolist()
def tokenize_data(texts, tokenizer):
    return tokenizer(texts, padding=True, truncation=True, return_tensors='pt', max_length=512)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
# model_path = "model_2024-12-03_16-37-16.pth"
# model_path = "my_dict.pth"
# model_path = "best_model.pth"
model_path = "model_avr.pth"

In [13]:
cve_data_csv = "cve_data.csv"
output_csv = "cve_data_predictions.csv"

cve_data_df = pd.read_csv(cve_data_csv)
cve_data_df = cve_data_df[cve_data_df['Artifact Id'] != 'd3f:System Software']

saved_dict = torch.load(model_path, weights_only=True)

_model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased', 
    num_labels=len(filtered_labels_list)  
)
_model.load_state_dict(saved_dict)
_model.to(device)
_model.eval()

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
encodings = tokenize_data(cve_data_df['Example Description'].tolist(), tokenizer)
dataset = TensorDataset(encodings['input_ids'], encodings['attention_mask'])
data_loader = DataLoader(dataset, sampler=SequentialSampler(dataset), batch_size=batch_size) 


predictions = []
with torch.no_grad():
    for batch in tqdm(data_loader, desc="Making Predictions on New Data"):
        input_ids, attention_mask = [item.to(device) for item in batch]
        outputs = _model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        preds = torch.argmax(logits, dim=-1)
        predictions.extend(preds.cpu().tolist())

# prediction_column_name = "Predicted Label"

# cve_data_df['Predicted Label'] = predictions

# true_labels = cve_data_df['Artifact Id'].tolist()

# # Calculate accuracy and F1 score
# accuracy = np.sum(np.array(predictions) == np.array(true_labels)) / len(true_labels) * 100  # in percentage
# # f1 = f1_score(true_labels, predictions, average='weighted')

# cve_data_df[['Artifact Id', 'Predicted Label']].to_csv(output_csv, index=False)
# print(f"Predictions saved to {output_csv}")
        
# Map predictions back to labels        
predicted_labels = [filtered_labels_list[pred] for pred in predictions]

# Add predictions to DataFrame
cve_data_df['Predicted Label'] = predicted_labels

# Convert true labels to the same format
true_labels = cve_data_df['Artifact Id'].map(lambda x: filtered_labels_list.index(x)).tolist()

# Calculate accuracy
accuracy = np.sum(np.array(predictions) == np.array(true_labels)) / len(true_labels) * 100

# Calculate F1 score (weighted average for multi-class classification)
f1 = f1_score(true_labels, predictions, average='weighted')

# Save predictions to CSV
cve_data_df[['Artifact Id', 'Predicted Label']].to_csv(output_csv, index=False)
print(f"Predictions saved to {output_csv}")
print(f"Accuracy: {accuracy:.2f}%")
print(f"F1 Score (Weighted): {f1:.2f}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Making Predictions on New Data:   0%|          | 0/3 [00:00<?, ?it/s]

Predictions saved to cve_data_predictions.csv
Accuracy: 52.78%
F1 Score (Weighted): 0.38
