In [1]:
# Cell 1: Install dependencies
!pip install --quiet transformers datasets torch scikit-learn pandas


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m15.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# Cell 2: Imports
import pandas as pd
import numpy as np
import torch

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer
)
from datasets import Dataset


In [3]:
# Cell 3: Load and preprocess your TON_IoT CSV
from google.colab import files
uploaded = files.upload()            # select linux_memory.csv
csv_path = next(iter(uploaded.keys()))
df = pd.read_csv(csv_path, low_memory=False)

# Keep only rows with no missing and type in {0,1}
features = ['ts','PID','MINFLT','MAJFLT','VSTEXT','VSIZE','RSIZE','VGROW','RGROW','MEM']
df = df.dropna(subset=features + ['type']).reset_index(drop=True)
df['type'] = df['type'].astype(int)

# Turn each row into a short text description
def to_text(row):
    return (f"Time {int(row.ts)}: PID {int(row.PID)}, "
            f"{int(row.MINFLT)} minor faults, {int(row.MAJFLT)} major faults, "
            f"{row.MEM*100:.1f}% memory usage.")
df['text'] = df.apply(to_text, axis=1)

# Train/test split (80/20 stratified)
train_df, test_df = train_test_split(
    df, test_size=0.2, stratify=df['type'], random_state=42
)
print(f"Train: {len(train_df)} rows; Test: {len(test_df)} rows")


Saving linux_memory_binarized.csv to linux_memory_binarized.csv
Train: 800000 rows; Test: 200000 rows


In [4]:
# Cell 4: Build huggingface Datasets and tokenize
def make_dataset(pddf):
    return Dataset.from_dict({
        "text":  pddf["text"].tolist(),
        "label": pddf["type"].tolist()
    })

train_ds = make_dataset(train_df)
test_ds  = make_dataset(test_df)

def tokenize(example, tokenizer):
    tok = tokenizer(
        example["text"],
        truncation=True,
        max_length=128
    )
    tok["labels"] = example["label"]
    return tok


In [5]:
# Cell 5: Fine-tune & evaluate helper (fixed TrainingArguments)

from transformers import TrainingArguments, Trainer

def finetune_and_eval(model_name, train_ds, test_ds):
    print(f"\n\n=== Training & Evaluating {model_name} ===")
    # 1) Load tokenizer & model
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

    # 2) Tokenize
    tok_train = train_ds.map(lambda ex: tokenize(ex, tokenizer), batched=True)
    tok_test  = test_ds.map(lambda ex: tokenize(ex, tokenizer), batched=True)

    # 3) Data collator
    collator = DataCollatorWithPadding(tokenizer)

    # 4) Training args (no evaluation_strategy/save_strategy)
    args = TrainingArguments(
        output_dir=f"./{model_name.replace('/','_')}_toniot",
        num_train_epochs=1,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        logging_steps=100,
        save_steps=500,
        save_total_limit=2
    )

    # 5) Metrics function
    def compute_metrics(pred):
        labels = pred.label_ids
        preds  = np.argmax(pred.predictions, axis=1)
        return {
            "accuracy": accuracy_score(labels, preds),
            "precision": classification_report(labels, preds, output_dict=True)["1"]["precision"],
            "recall":    classification_report(labels, preds, output_dict=True)["1"]["recall"],
            "f1":        classification_report(labels, preds, output_dict=True)["1"]["f1-score"]
        }

    # 6) Trainer
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tok_train,
        eval_dataset=tok_test,
        tokenizer=tokenizer,
        data_collator=collator,
        compute_metrics=compute_metrics
    )

    # 7) Train
    trainer.train()
    trainer.save_model(f"./{model_name.replace('/','_')}_toniot")
    tokenizer.save_pretrained(f"./{model_name.replace('/','_')}_toniot")

    # 8) Manual evaluation
    eval_metrics = trainer.evaluate(eval_dataset=tok_test)
    print(f"\nFinal eval results for {model_name}:\n", eval_metrics)

    # 9) Detailed confusion matrix
    preds = trainer.predict(tok_test)
    y_true = preds.label_ids
    y_pred = np.argmax(preds.predictions, axis=1)
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))

    return eval_metrics


In [6]:
# BERT-Small (4-layer, 512-hidden)
res_small = finetune_and_eval("prajjwal1/bert-small", train_ds, test_ds)




=== Training & Evaluating prajjwal1/bert-small ===


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/116M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at prajjwal1/bert-small and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/800000 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/116M [00:00<?, ?B/s]

Map:   0%|          | 0/200000 [00:00<?, ? examples/s]

  trainer = Trainer(


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marafatcoc01[0m ([33marafatcoc01-rmit-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
100,0.3446
200,0.2871
300,0.1972
400,0.2054
500,0.1902
600,0.1859
700,0.1732
800,0.1664
900,0.1565
1000,0.1472



Final eval results for prajjwal1/bert-small:
 {'eval_loss': 0.022053992375731468, 'eval_accuracy': 0.993305, 'eval_precision': 0.9636585365853658, 'eval_recall': 0.9815742619353236, 'eval_f1': 0.9725338967405797, 'eval_runtime': 85.3743, 'eval_samples_per_second': 2342.625, 'eval_steps_per_second': 146.414, 'epoch': 1.0}
Confusion Matrix:
 [[174955    894]
 [   445  23706]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.99      1.00    175849
           1       0.96      0.98      0.97     24151

    accuracy                           0.99    200000
   macro avg       0.98      0.99      0.98    200000
weighted avg       0.99      0.99      0.99    200000



In [None]:
#BERT-Base Uncased (12 layers, 768-hidden):
res_base = finetune_and_eval("bert-base-uncased", train_ds, test_ds)



=== Training & Evaluating bert-base-uncased ===


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/800000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200000 [00:00<?, ? examples/s]

  trainer = Trainer(


Step,Training Loss
100,0.3427
200,0.3238
300,0.2379
400,0.2276
500,0.2227
600,0.1885
700,0.1434
800,0.1468
900,0.1368
1000,0.1314


In [None]:
#bert-large-uncased
res_base = finetune_and_eval("bert-large-uncased", train_ds, test_ds)

In [None]:
res_base = finetune_and_eval("roberta-base", train_ds, test_ds)