In [1]:
import os
import gzip
import json
import random
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score
import numpy as np

In [2]:
random.seed(42)
np.random.seed(42)

models_dir = "/home/knordby/Documents/labeling/models"
os.makedirs(models_dir, exist_ok=True)
print(f"\nüìÅ Models will be saved to: {models_dir}")


üìÅ Models will be saved to: /home/knordby/Documents/labeling/models


### Saving Embeddings...
Here we load our embeddings and save them to this device. In the future (after this is done), we wont run these chunks.

In [3]:
print("\n[1/4] Loading embeddings...")

# Load 200K general embeddings
print("   Loading general_sample_200K embeddings...")
with gzip.open('data/general_sample_200K_gemma_embedding.jsonl.gz', 'rt') as f:
    _200k_embeddings = json.load(f)
_200k_embeddings = {k.replace('.json', ''): v for k, v in _200k_embeddings.items()}
print(f"   Loaded {len(_200k_embeddings)} embeddings from 200K dataset")

# Load 70K cyber-biased embeddings
print("   Loading cyber_biased_sample_70K embeddings...")
with gzip.open('data/cyber_biased_sample_70K_gemma_embedding.jsonl.gz', 'rt') as f:
    _70k_embeddings = json.load(f)
_70k_embeddings = {k.replace('.json', ''): v for k, v in _70k_embeddings.items()}
print(f"   Loaded {len(_70k_embeddings)} embeddings from 70K dataset")

# Merge embeddings
gemma_embeddings = _70k_embeddings | _200k_embeddings
print(f"   Total embeddings after merge: {len(gemma_embeddings)}")


[1/4] Loading embeddings...
   Loading general_sample_200K embeddings...
   Loaded 199793 embeddings from 200K dataset
   Loading cyber_biased_sample_70K embeddings...
   Loaded 62605 embeddings from 70K dataset
   Total embeddings after merge: 262398


In [4]:
print("   Loading cyber_biased_sample_70K labels...")
with open('data/gemma_cyber_biased_sample_70K.jsonl', 'r') as f:
    cyber_biased_70k = json.load(f)
print(f"   Loaded {len(cyber_biased_70k)} labels from 70K dataset")

# Load 200K general labels
print("   Loading general_sample_200K labels...")
with open("data/gemma-cyber-general_sample_200K.jsonl", 'r') as f:
    cyber_general_200k = json.load(f)
print(f"   Loaded {len(cyber_general_200k)} labels from 200K dataset")

   Loading cyber_biased_sample_70K labels...
   Loaded 47562 labels from 70K dataset
   Loading general_sample_200K labels...
   Loaded 160428 labels from 200K dataset


In [5]:
# Calculate statistics for general labels
general_labels = [1 if v == 'true' else 0 for v in cyber_general_200k.values()]
pct_cyber = sum(general_labels) / len(general_labels)
print(f"   Population portion cyber-classified: {round(pct_cyber, 2) * 100}%")

# Merge labels
cyber_labels = cyber_biased_70k | cyber_general_200k
print(f"   Total labels after merge: {len(cyber_labels)}")

   Population portion cyber-classified: 5.0%
   Total labels after merge: 207990


In [7]:
cyber_embeddings_clean = {k:v for k,v in gemma_embeddings.items() if k in cyber_labels.keys()}

In [8]:
cyber_embeddings = np.array([v for v in cyber_embeddings_clean.values()])
ids = np.array([idx for idx in cyber_embeddings_clean.keys()])
labels = np.array([1 if cyber_labels[str(idx)] == 'true' else 0 for idx in ids])

In [9]:
np.savez_compressed(
    'cyber_gemma_embeddings_with_ids.npz',
    embeddings=cyber_embeddings,
    ids=ids,
    labels=labels
)

### Load the embeddings

In [3]:
%%time
data = np.load('datasets/cyber_gemma_embeddings_with_ids.npz')
embeddings = data['embeddings']  # Shape: (N, embedding_dim)
ids = data['ids']                 # Shape: (N,)
labels = data['labels'] 
embeddings

CPU times: user 5.86 s, sys: 443 ms, total: 6.31 s
Wall time: 6.31 s


array([[-0.06934851, -0.03356409,  0.01137817, ...,  0.01780601,
        -0.01562081, -0.02061358],
       [-0.08027899,  0.03303464,  0.01234967, ...,  0.03866593,
         0.06492707,  0.00011773],
       [-0.03076481,  0.00835974,  0.01186978, ...,  0.02294729,
        -0.00049745, -0.02872812],
       ...,
       [-0.09449892,  0.0213499 , -0.01632331, ..., -0.0328131 ,
        -0.01384491,  0.04825141],
       [-0.04347664,  0.03659043, -0.00044882, ..., -0.0002375 ,
        -0.0075912 , -0.02220969],
       [-0.04685944, -0.04274602,  0.00882755, ..., -0.01556782,
         0.02710383, -0.05799818]])

In [6]:
with open("english_ids.txt", "r") as f:
    english_ids = f.read().splitlines()
english_ids_set = set(english_ids)
with open("nonenglish_ids.txt", "r") as f:
    nonenglish_ids = f.read().splitlines()
nonenglish_ids_set = set(nonenglish_ids)

In [4]:
ids

array(['3c0972278d25b612e44337041daa8ec059116f41c02362730f8915bee659e618',
       '0c7d596d47e125486e1d6150a7d7e66fc7bee5403b3cfc96dde2040ab3522bed',
       'e13dc78430de551d9f82f5fa3a9129751e73d12a1761e4f3a701709c02e632ea',
       ...,
       '956f8a1f72f68848fea3a4381d2f6d6b65eea1e48473842efb13eeff48b99c08',
       '23c946b5ff817da7c9ad896870c70795b06ada3bc8a97a8f1ef908250b766adc',
       '75c31b57bbe658c85e3edbaa08cc592f28c608cafe9d12bf48fb39da7780f40e'],
      dtype='<U64')

In [5]:
labels

array([1, 1, 1, ..., 0, 0, 0])

In [7]:
english_mask = np.array([id_ in english_ids_set for id_ in ids])
nonenglish_mask = np.array([id_ in nonenglish_ids_set for id_ in ids])

english_embeddings = embeddings[english_mask]
nonenglish_embeddings = embeddings[nonenglish_mask]
english_labels = labels[english_mask]
nonenglish_labels = labels[nonenglish_mask]

### Prepare Data

In [8]:
from sklearn.model_selection import train_test_split
print("\n[3/4] Preparing train/test split...")

# x_train_ids,x_test_ids, y_train,y_test = train_test_split(ids, labels, train_size = 0.8, stratify = labels)
english_xtrain, english_xtest, english_ytrain, english_ytest = train_test_split(
    english_embeddings, english_labels, train_size=0.8, stratify=english_labels, random_state=42)
nonenglish_xtrain, nonenglish_xtest, nonenglish_ytrain, nonenglish_ytest = train_test_split(
    nonenglish_embeddings, nonenglish_labels, train_size=0.8, stratify=nonenglish_labels, random_state=42)

x_train = np.concatenate([english_xtrain, nonenglish_xtrain], axis=0)
y_train =  np.concatenate([english_ytrain, nonenglish_ytrain], axis=0)
x_test = np.concatenate([english_xtest, nonenglish_xtest], axis=0)
y_test =  np.concatenate([english_ytest, nonenglish_ytest], axis=0)

print("x_train: ", len(x_train)/(len(x_train)+len(x_test)))
print("test size: ", len(x_test)/(len(x_train)+len(x_test)))


val_split_idx = int(len(x_train)*.85)
x_val, y_val = x_train[val_split_idx:], y_train[val_split_idx:]
x_train, y_train = x_train[:val_split_idx], y_train[:val_split_idx]


[3/4] Preparing train/test split...
x_train:  0.7999912733438054
test size:  0.20000872665619468


In [9]:
# Create WeightedRandomSampler for balanced training
print("\n[3.5/4] Setting up WeightedRandomSampler for balanced training...")

# Calculate class weights
class_counts = np.bincount(y_train)
print(f"Training class distribution:")
print(f"  Class 0: {class_counts[0]} samples ({class_counts[0]/len(y_train)*100:.1f}%)")
print(f"  Class 1: {class_counts[1]} samples ({class_counts[1]/len(y_train)*100:.1f}%)")

# Calculate sample weights (inverse frequency)
sample_weights = 1.0 / class_counts[y_train]

# Create sampler
train_sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True
)

print(f"‚úì WeightedRandomSampler configured for ~50/50 class balance")
print(f"  Training will probabilistically sample to achieve balanced batches\n")


[3.5/4] Setting up WeightedRandomSampler for balanced training...
Training class distribution:
  Class 0: 103731 samples (95.1%)
  Class 1: 5358 samples (4.9%)
‚úì WeightedRandomSampler configured for ~50/50 class balance
  Training will probabilistically sample to achieve balanced batches



#### Dataset Stats

In [10]:
print(f"\nüìä Dataset Statistics:")
print(f"   Training set shape: {x_train.shape}")
print(f"   Test set shape: {x_test.shape}")
print(f"   Embedding dimension: {x_train.shape[1]}")
print(f"\n   Label Distribution:")
print(f"   ‚Ä¢ Training - Cyber: {sum(y_train)} ({sum(y_train)/len(y_train)*100:.1f}%)")
print(f"   ‚Ä¢ Training - Non-cyber: {len(y_train)-sum(y_train)} ({(len(y_train)-sum(y_train))/len(y_train)*100:.1f}%)")
print(f"   ‚Ä¢ Test - Cyber: {sum(y_test)} ({sum(y_test)/len(y_test)*100:.1f}%)")
print(f"   ‚Ä¢ Test - Non-cyber: {len(y_test)-sum(y_test)} ({(len(y_test)-sum(y_test))/len(y_test)*100:.1f}%)")


üìä Dataset Statistics:
   Training set shape: (109089, 768)
   Test set shape: (32087, 768)
   Embedding dimension: 768

   Label Distribution:
   ‚Ä¢ Training - Cyber: 5358 (4.9%)
   ‚Ä¢ Training - Non-cyber: 103731 (95.1%)
   ‚Ä¢ Test - Cyber: 1517 (4.7%)
   ‚Ä¢ Test - Non-cyber: 30570 (95.3%)


### Build the Model

In [11]:
from torch_models import *

# Check GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Build model
model, optimizer, criterion = build_model(
    input_dim=x_train.shape[1],  # Auto-detect from your data
    device=device
)

Using device: cuda
MODEL BUILT
Architecture: CyberClassifier
Input dimension: 768
Hidden layers: 512 -> 256 -> 128
Output: 1 (binary classification)
Total parameters: 561,409
Trainable parameters: 561,409
Device: cuda



In [12]:
# Set save path
model_path = '/home/knordby/Documents/labeling/models/cyber_gemmaEmbeddings.pt'

# Train
model, history = train_model(
    model, optimizer, criterion,
    x_train, y_train, x_val, y_val,
    train_sampler=train_sampler,
    device=device,
    epochs=80,
    batch_size=512,
    model_path=model_path
)

‚úì Using WeightedRandomSampler for balanced training
TRAINING
Epochs: 80
Batch size: 512
Training samples: 109089
Validation samples: 19252
Early stopping patience: 15

Epoch 1/80 - Time: 3.43s
  Train - Loss: 0.1517, Acc: 0.9445, AUC: 0.9851
  Val   - Loss: 0.1288, Acc: 0.9561, AUC: 0.9434, Precision: 0.4390, Recall: 0.6963
  ‚úì Best model saved (AUC: 0.9434)

Epoch 2/80 - Time: 2.69s
  Train - Loss: 0.0475, Acc: 0.9864, AUC: 0.9975
  Val   - Loss: 0.1276, Acc: 0.9646, AUC: 0.9400, Precision: 0.5156, Recall: 0.6285
  No improvement (patience: 1/15)

Epoch 3/80 - Time: 2.68s
  Train - Loss: 0.0270, Acc: 0.9929, AUC: 0.9989
  Val   - Loss: 0.1404, Acc: 0.9658, AUC: 0.9370, Precision: 0.5340, Recall: 0.5551
  No improvement (patience: 2/15)

Epoch 4/80 - Time: 2.68s
  Train - Loss: 0.0189, Acc: 0.9950, AUC: 0.9994
  Val   - Loss: 0.1566, Acc: 0.9679, AUC: 0.9369, Precision: 0.5710, Recall: 0.5113
  No improvement (patience: 3/15)

Epoch 5/80 - Time: 2.88s
  Train - Loss: 0.0167, Acc: 0

### Evaluate the Model's Performance Against the Test Set

In [13]:
# Evaluate with detailed metrics
y_pred_probs, metrics = evaluate_model(
    model, x_test, y_test,
    device=device
)

# Access individual metrics if needed
print(f"Test AUC: {metrics['auc']:.4f}")

üìà CYBERSECURITY CLASSIFIER - FINAL TEST RESULTS
   Loss:      0.1536
   Accuracy:  0.9456 (94.56%)
   Precision: 0.4513
   Recall:    0.6961
   AUC:       0.9385
   F1 Score:  0.5476

Confusion Matrix:
                 Predicted
                 Negative  Positive
Actual Negative     29286      1284
       Positive       461      1056

Detailed Metrics:
   True Positives:  1056
   True Negatives:  29286
   False Positives: 1284
   False Negatives: 461
   Specificity:     0.9580
   NPV:             0.9845

Classification Report:
              precision    recall  f1-score   support

   Non-Cyber     0.9845    0.9580    0.9711     30570
       Cyber     0.4513    0.6961    0.5476      1517

    accuracy                         0.9456     32087
   macro avg     0.7179    0.8271    0.7593     32087
weighted avg     0.9593    0.9456    0.9510     32087


Test AUC: 0.9385


#### English Performance

In [14]:
# Evaluate with detailed metrics
y_pred_probs, metrics = evaluate_model(
    model, english_xtest, english_ytest,
    device=device
)

# Access individual metrics if needed
print(f"Test AUC: {metrics['auc']:.4f}")

üìà CYBERSECURITY CLASSIFIER - FINAL TEST RESULTS
   Loss:      0.2082
   Accuracy:  0.9239 (92.39%)
   Precision: 0.4744
   Recall:    0.7350
   AUC:       0.9291
   F1 Score:  0.5766

Confusion Matrix:
                 Predicted
                 Negative  Positive
Actual Negative      8264       544
       Positive       177       491

Detailed Metrics:
   True Positives:  491
   True Negatives:  8264
   False Positives: 544
   False Negatives: 177
   Specificity:     0.9382
   NPV:             0.9790

Classification Report:
              precision    recall  f1-score   support

   Non-Cyber     0.9790    0.9382    0.9582      8808
       Cyber     0.4744    0.7350    0.5766       668

    accuracy                         0.9239      9476
   macro avg     0.7267    0.8366    0.7674      9476
weighted avg     0.9435    0.9239    0.9313      9476


Test AUC: 0.9291


#### Non-English Performance

In [15]:
# Evaluate with detailed metrics
y_pred_probs, metrics = evaluate_model(
    model, nonenglish_xtest, nonenglish_ytest,
    device=device
)

# Access individual metrics if needed
print(f"Test AUC: {metrics['auc']:.4f}")

üìà CYBERSECURITY CLASSIFIER - FINAL TEST RESULTS
   Loss:      0.1308
   Accuracy:  0.9547 (95.47%)
   Precision: 0.4330
   Recall:    0.6655
   AUC:       0.9400
   F1 Score:  0.5246

Confusion Matrix:
                 Predicted
                 Negative  Positive
Actual Negative     21022       740
       Positive       284       565

Detailed Metrics:
   True Positives:  565
   True Negatives:  21022
   False Positives: 740
   False Negatives: 284
   Specificity:     0.9660
   NPV:             0.9867

Classification Report:
              precision    recall  f1-score   support

   Non-Cyber     0.9867    0.9660    0.9762     21762
       Cyber     0.4330    0.6655    0.5246       849

    accuracy                         0.9547     22611
   macro avg     0.7098    0.8157    0.7504     22611
weighted avg     0.9659    0.9547    0.9593     22611


Test AUC: 0.9400


### Push the Model

In [16]:
from push_to_huggingface import push_to_huggingface

with open("hf_token.txt",'r') as f:
    token = f.read()

# Push your model (after training and evaluation)
repo_url = push_to_huggingface(
    model_path='/home/knordby/Documents/labeling/models/cyber_gemmaEmbeddings.pt',
    repo_name='gemma_cyber',  # Choose your repo name
    metrics=metrics,  # From evaluate_model()
    input_dim=x_train.shape[1],  # Your embedding dimension
    hf_token=token,  # Your token
    private=False  # Set True if you want private repo
)

print(f"Model available at: {repo_url}")


PUSHING MODEL TO HUGGINGFACE
Repository: kristiangnordby/gemma_cyber
Private: False

‚úÖ Repository created/verified: kristiangnordby/gemma_cyber

üìù Creating model card...
‚öôÔ∏è  Saving configuration...
üèóÔ∏è  Saving model architecture...
üíæ Preparing model checkpoint...

üì§ Uploading files to HuggingFace...
  ‚úì Uploaded: README.md


No files have been modified since last commit. Skipping to prevent empty commit.


  ‚úì Uploaded: config.json
  ‚úì Uploaded: model_architecture.py


Processing Files (0 / 0): |          |  0.00B /  0.00B            

New Data Upload: |          |  0.00B /  0.00B            

  ‚úì Uploaded: model.pt

‚úÖ MODEL SUCCESSFULLY PUSHED TO HUGGINGFACE!
üîó View your model at: https://huggingface.co/kristiangnordby/gemma_cyber

Model available at: https://huggingface.co/kristiangnordby/gemma_cyber
