In [1]:
# imports
from transformers import AutoModelForSequenceClassification, AutoTokenizer


In [2]:
# Define your label mappings
id2label = {0: "None", 1: "User", 2: "Root"}
label2id = {"None": 0, "User": 1, "Root": 2}

# Load SecureBERT using AutoModelForSequenceClassification
model_name = "ehsanaghaei/SecureBERT"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,  # None, User, Root
    id2label=id2label,
    label2id=label2id
)

print(f"Model architecture: {type(model)}")
print(f"Number of labels: {model.num_labels}")
print(f"Label mappings: {model.config.id2label}")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ehsanaghaei/SecureBERT and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model architecture: <class 'transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification'>
Number of labels: 3
Label mappings: {0: 'None', 1: 'User', 2: 'Root'}


## Training ##

In [3]:
from transformers import (
    AutoModelForSequenceClassification, 
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from datasets import Dataset
import evaluate
import numpy as np
import pandas as pd
from peft import LoraConfig, get_peft_model

# 1. Load model and tokenizer
model_name = "ehsanaghaei/SecureBERT"
id2label = {0: "None", 1: "User", 2: "Root"}
label2id = {"None": 0, "User": 1, "Root": 2}

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=3,
    id2label=id2label,
    label2id=label2id
)
# Apply PEFT (LoRA)
lora_config = LoraConfig(
    task_type="SEQ_CLS",
    r=8,                    # Low rank for small dataset
    lora_alpha=32,          # Scaling factor
    lora_dropout=0.2,       # Higher dropout for regularization
    target_modules=["query", "value", "key"],  # Target attention layers
    bias="none"             # Don't train bias terms
)

model = get_peft_model(model, lora_config)

# 2. Load the actual dataset
df = pd.read_csv("../datasets/postcondition-dataset-finished.tsv", sep='\t')

# Clean the data and prepare for training
df.columns = df.columns.str.strip()
df = df.dropna(subset=['DESCRIPTION', 'CVSS', 'POSTCONDITION'])  # Remove rows with missing values in essential columns
df['POSTCONDITION'] = df['POSTCONDITION'].str.strip()  # Remove whitespace

# Combine description and CVSS into text format
df['text'] = "Description: " + df['DESCRIPTION'] + " CVSS: " + df['CVSS']

# Prepare dataset in the format needed for training
dataset_dict = {
    "text": df['text'].tolist(),
    "label": df['POSTCONDITION'].tolist()
}

print("--- Example from dataset_dict (before label conversion) ---")
for i in range(3): # Print the first 3 samples
    print(f"Text: {dataset_dict['text'][i]}")
    print(f"Label: {dataset_dict['label'][i]}\n")


print(f"Dataset size: {len(dataset_dict['text'])}")
print(f"Label distribution:")
print(pd.Series(dataset_dict['label']).value_counts())

# Convert to dataset
dataset = Dataset.from_dict(dataset_dict)

# Convert labels to numeric
def format_labels(example):
    example["label"] = label2id[example["label"]]
    return example

dataset = dataset.map(format_labels)

print("\n--- Example from dataset (after label conversion) ---")
print("The string labels should now be integer IDs.")
print(dataset[0])
print(dataset[1])
print(dataset[2])


# 3. Tokenize dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize_function, batched=True)

# 4. Split dataset (80% train, 20% test) with consistent seed
train_test_split = tokenized_dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

print(f"Training samples: {len(train_dataset)}")
print(f"Evaluation samples: {len(eval_dataset)}")

# 5. Setup evaluation
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

# 6. Training arguments
training_args = TrainingArguments(
    output_dir="./securebert-privilege-classifier",
    learning_rate=1e-5,  # Lower learning rate
    per_device_train_batch_size=4,  # Smaller batch size
    per_device_eval_batch_size=4,
    num_train_epochs=10,  # More epochs with early stopping
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    logging_dir="./logs",
    logging_steps=5,
    seed=42,
    gradient_accumulation_steps=2,  # Accumulate gradients
    warmup_steps=10,  # Add warmup
    fp16=True,  # Use mixed precision if GPU available
)

# 7. Create Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

# 8. Train
print("Starting training...")
trainer.train()

# 9. Save the trained model and tokenizer
print("Saving trained model...")
trainer.save_model("./securebert-privilege-classifier-final")
tokenizer.save_pretrained("./securebert-privilege-classifier-final")
print("Model saved to ./securebert-privilege-classifier-final")

# 10. Get final evaluation metrics
print("Evaluating final model...")
final_metrics = trainer.evaluate()
print(f"Final evaluation metrics: {final_metrics}")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ehsanaghaei/SecureBERT and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


--- Example from dataset_dict (before label conversion) ---
Text: Description: A local privilege escalation vulnerability in the Intercept X for Windows installer prior version 1.22 can lead to a local user gaining system level privileges, if the installer is run as SYSTEM. CVSS: CVSS:3.1/AV:L/AC:H/PR:L/UI:R/S:C/C:H/I:H/A:H
Label: Root

Text: Description: Notepad++ is a free and open-source source code editor. In versions 8.8.1 and prior, a privilege escalation vulnerability exists in the Notepad++ v8.8.1 installer that allows unprivileged users to gain SYSTEM-level privileges through insecure executable search paths. An attacker could use social engineering or clickjacking to trick users into downloading both the legitimate installer and a malicious executable to the same directory (typically Downloads folder - which is known as Vulnerable directory). Upon running the installer, the attack executes automatically with SYSTEM privileges. This issue has been fixed and will be released in

Map:   0%|          | 0/31 [00:00<?, ? examples/s]


--- Example from dataset (after label conversion) ---
The string labels should now be integer IDs.
{'text': 'Description: A local privilege escalation vulnerability in the Intercept X for Windows installer prior version 1.22 can lead to a local user gaining system level privileges, if the installer is run as SYSTEM. CVSS: CVSS:3.1/AV:L/AC:H/PR:L/UI:R/S:C/C:H/I:H/A:H', 'label': 2}
{'text': 'Description: Notepad++ is a free and open-source source code editor. In versions 8.8.1 and prior, a privilege escalation vulnerability exists in the Notepad++ v8.8.1 installer that allows unprivileged users to gain SYSTEM-level privileges through insecure executable search paths. An attacker could use social engineering or clickjacking to trick users into downloading both the legitimate installer and a malicious executable to the same directory (typically Downloads folder - which is known as Vulnerable directory). Upon running the installer, the attack executes automatically with SYSTEM privileges. 

Map:   0%|          | 0/31 [00:00<?, ? examples/s]

Training samples: 24
Evaluation samples: 7


  trainer = Trainer(
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting training...


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.104492,0.142857
2,1.095200,1.104771,0.142857
3,1.095200,1.104911,0.142857
4,1.102000,1.104632,0.142857
5,1.103100,1.104632,0.142857
6,1.103100,1.104911,0.142857
7,1.102600,1.10505,0.142857
8,1.102600,1.104911,0.142857
9,1.100900,1.10505,0.142857
10,1.096000,1.10519,0.142857


Saving trained model...
Model saved to ./securebert-privilege-classifier-final
Evaluating final model...
Model saved to ./securebert-privilege-classifier-final
Evaluating final model...


Final evaluation metrics: {'eval_loss': 1.1044921875, 'eval_accuracy': 0.14285714285714285, 'eval_runtime': 0.0471, 'eval_samples_per_second': 148.527, 'eval_steps_per_second': 42.436, 'epoch': 10.0}


## Model Testing and Results ##

In [None]:
import pandas as pd
import torch

def test_on_held_out_set(model, tokenizer, test_dataset, output_file="securebert_test_results.csv"):
    """Test the trained model on the held-out test set and save results"""
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()
    
    results = []
    
    print(f"Testing model on {len(test_dataset)} samples...")
    
    for i, sample in enumerate(test_dataset):
        # Reconstruct the original text
        text = tokenizer.decode(sample['input_ids'], skip_special_tokens=True)
        
        # Get prediction
        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
        inputs = {k: v.to(device) for k, v in inputs.items()}
        
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            predictions = torch.nn.functional.softmax(logits, dim=-1)
            predicted_class = torch.argmax(predictions, dim=1).item()
            confidence = predictions[0][predicted_class].item()
        
        # Convert labels back to text
        true_label = model.config.id2label[sample['label']]
        predicted_label = model.config.id2label[predicted_class]
        
        # Get all probabilities
        all_probs = {
            model.config.id2label[i]: predictions[0][i].item() 
            for i in range(len(model.config.id2label))
        }
        
        results.append({
            'text': text,
            'true_label': true_label,
            'predicted_label': predicted_label,
            'confidence': confidence,
            'correct': true_label == predicted_label,
            'prob_none': all_probs['None'],
            'prob_user': all_probs['User'],
            'prob_root': all_probs['Root']
        })
        
        if (i + 1) % 10 == 0:
            print(f"Processed {i + 1}/{len(test_dataset)} test samples")
    
    # Save results
    df_results = pd.DataFrame(results)
    df_results.to_csv(output_file, index=False)
    
    # Calculate metrics
    accuracy = df_results['correct'].mean()
    total_samples = len(df_results)
    correct_predictions = df_results['correct'].sum()
    
    print(f"\n=== TEST SET RESULTS ===")
    print(f"Total samples: {total_samples}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")
    
    # Per-class breakdown
    print(f"\n=== PER-CLASS RESULTS ===")
    for label in ['None', 'User', 'Root']:
        subset = df_results[df_results['true_label'] == label]
        if len(subset) > 0:
            class_accuracy = subset['correct'].mean()
            print(f"{label}: {len(subset)} samples, {class_accuracy:.4f} accuracy ({class_accuracy*100:.1f}%)")
            
            # Show what this class was predicted as
            pred_counts = subset['predicted_label'].value_counts()
            for pred_label, count in pred_counts.items():
                percentage = (count / len(subset)) * 100
                print(f"  -> Predicted as {pred_label}: {count} ({percentage:.1f}%)")
    
    # Confidence analysis
    print(f"\n=== CONFIDENCE ANALYSIS ===")
    high_conf = df_results[df_results['confidence'] >= 0.9]
    med_conf = df_results[(df_results['confidence'] >= 0.7) & (df_results['confidence'] < 0.9)]
    low_conf = df_results[df_results['confidence'] < 0.7]
    
    print(f"High confidence (≥90%): {len(high_conf)} samples, {high_conf['correct'].mean():.4f} accuracy")
    print(f"Medium confidence (70-90%): {len(med_conf)} samples, {med_conf['correct'].mean():.4f} accuracy")
    print(f"Low confidence (<70%): {len(low_conf)} samples, {low_conf['correct'].mean():.4f} accuracy")
    
    print(f"\nResults saved to: {output_file}")
    
    return df_results

# Test the model on the held-out test set
print("Testing trained model on held-out test set...")
test_results = test_on_held_out_set(model, tokenizer, eval_dataset, "securebert_test_results.csv")

## Inference ## 

In [None]:
def predict_privilege(description, cvss_vector, model, tokenizer):
    """Predict privilege level using the fine-tuned model"""
    # Prepare input
    text = f"Description: {description} CVSS: {cvss_vector}"
    
    # Tokenize
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
    
    # Move to device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    # Predict
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.nn.functional.softmax(logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=1).item()
        confidence = predictions[0][predicted_class].item()
    
    return {
        "predicted_privilege": model.config.id2label[predicted_class],
        "confidence": confidence,
        "all_probabilities": {
            model.config.id2label[i]: predictions[0][i].item() 
            for i in range(len(model.config.id2label))
        }
    }

# Example usage
description = "Buffer overflow allowing arbitrary code execution"
cvss_vector = "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H"

result = predict_privilege(description, cvss_vector, model, tokenizer)
print(f"Predicted privilege: {result['predicted_privilege']}")
print(f"Confidence: {result['confidence']:.2%}")
print(f"All probabilities: {result['all_probabilities']}")