In [1]:
# Cell 1: Install dependencies
!pip install --quiet \
    tensorflow \
    sentence-transformers \
    transformers \
    torch \
    scikit-learn \
    datasets


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m35.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# Cell 2: Imports & Configuration
import os
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from sentence_transformers import SentenceTransformer
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    pipeline
)
from datasets import Dataset


In [4]:
# Cell 3: VAE Custom Layers & Builder

@tf.keras.utils.register_keras_serializable()
def sampling(args):
    mean, log_var = args
    log_var = tf.clip_by_value(log_var, -5.0, 5.0)
    eps = K.random_normal((K.shape(mean)[0], tf.shape(mean)[1]))
    return mean + K.exp(0.5 * log_var) * eps

@tf.keras.utils.register_keras_serializable(package="Custom")
class VAELossLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        orig, recon, mean, log_var = inputs
        recon_loss = tf.reduce_sum(tf.square(orig - recon), axis=1)
        kl_loss    = -0.5 * tf.reduce_sum(1 + log_var - tf.square(mean) - tf.exp(log_var), axis=1)
        self.add_loss(tf.reduce_mean(recon_loss + kl_loss))
        return recon

def build_vae(input_dim):
    inp = tf.keras.Input(shape=(input_dim,))
    x = tf.keras.layers.Dense(64, activation="relu")(inp)
    x = tf.keras.layers.Dense(32, activation="relu")(x)
    z_mean   = tf.keras.layers.Dense(4, name="z_mean")(x)
    z_logvar = tf.keras.layers.Dense(4, name="z_log_var")(x)
    z        = tf.keras.layers.Lambda(sampling, name="z")([z_mean, z_logvar])

    latent = tf.keras.Input(shape=(4,))
    dx = tf.keras.layers.Dense(32, activation="relu")(latent)
    dx = tf.keras.layers.Dense(64, activation="relu")(dx)
    out = tf.keras.layers.Dense(input_dim, activation="linear")(dx)
    decoder = tf.keras.Model(latent, out, name="decoder")

    recon = decoder(z)
    loss_layer = VAELossLayer()([inp, recon, z_mean, z_logvar])
    vae = tf.keras.Model(inp, loss_layer, name="vae")
    vae.compile(optimizer=tf.keras.optimizers.Adam(1e-3))
    return vae

def load_vae(path, dim):
    try:
        m = tf.keras.models.load_model(
            path,
            custom_objects={"sampling": sampling, "VAELossLayer": VAELossLayer}
        )
        if m.input_shape[1] != dim:
            print(f"⚠️ VAE expects {m.input_shape[1]} dims, got {dim}.")
            return None
        print("✅ Loaded VAE from disk")
        return m
    except Exception:
        return None


In [5]:
# Cell 4: Data Loading & Preprocessing

df = pd.read_csv("sampled_data1.csv", low_memory=False)

# Numeric features & label
features = ['ts','PID','MINFLT','MAJFLT','VSTEXT','VSIZE','RSIZE','VGROW','RGROW','MEM']
df = df.dropna(subset=features+['type']).reset_index(drop=True)

X_df = df[features].astype(float)
y    = df['type'].astype(int).values

# Scale
scaler = StandardScaler()
X = scaler.fit_transform(X_df).astype(np.float32)

print("Samples:", X.shape[0], "Features:", X.shape[1])


Samples: 1000 Features: 10


In [6]:
# Cell 5: Train or Load VAE on Normal Data Only

normal_mask = (y == 0)
X_norm = X[normal_mask]

vae = load_vae("vae_model.keras", X.shape[1])
if vae is None:
    vae = build_vae(X.shape[1])
    vae.fit(
        X_norm, X_norm,
        epochs=20, batch_size=32, validation_split=0.1,
        callbacks=[
            tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=3, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2)
        ], verbose=1
    )
    vae.save("vae_model.keras")
    print("✅ Trained & saved VAE")


Epoch 1/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - loss: 12.2970 - val_loss: 13.7021 - learning_rate: 0.0010
Epoch 2/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.2918 - val_loss: 13.4203 - learning_rate: 0.0010
Epoch 3/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 8.8779 - val_loss: 13.6549 - learning_rate: 0.0010
Epoch 4/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 7.9459 - val_loss: 13.3984 - learning_rate: 0.0010
Epoch 5/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 8.6422 - val_loss: 13.3477 - learning_rate: 0.0010
Epoch 6/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 8.2262 - val_loss: 12.6812 - learning_rate: 0.0010
Epoch 7/20
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.4330 - val_loss: 12.6245 - learning_rate: 0.

In [7]:
# Cell 6: VAE Reconstruction & Flagging

recon = vae.predict(X)
errs  = np.mean((X - recon)**2, axis=1)

thr = np.percentile(errs[normal_mask], 95)
flags_vae = (errs > thr).astype(int)

print(f"Threshold={thr:.4f}, flagged {flags_vae.sum()} anomalies")


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
Threshold=0.8334, flagged 53 anomalies


In [8]:
# Cell 7: Prepare Text Dataset for GPT-Neo

def to_text(row):
    mem = row['MEM'] * 100
    return (f"Time {int(row['ts'])}: PID {int(row['PID'])}, "
            f"{int(row['MINFLT'])} minor faults, {int(row['MAJFLT'])} major faults, "
            f"{mem:.1f}% memory.")

df['text'] = df.apply(to_text, axis=1)
df['label_str'] = df['type'].map({0:"Normal", 1:"Anomaly"})

# Hugging Face Dataset
ds = Dataset.from_pandas(df[['text','label_str']])
split = ds.train_test_split(test_size=0.1, seed=42)
train_ds, eval_ds = split['train'], split['test']


In [11]:
# Cell 8: Tokenize & Data Collator (with pad_token fix)

model_name = "EleutherAI/gpt-neo-125M"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

# Fix missing pad_token
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def preprocess(ex):
    entries = ["Record: " + t + "\nResult: " + l for t,l in zip(ex['text'], ex['label_str'])]
    return tokenizer(entries, truncation=True, padding="max_length", max_length=128)

train_tok = train_ds.map(preprocess, batched=True)
eval_tok  = eval_ds.map(preprocess, batched=True)

data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)


Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [14]:
# Cell 9: Fine-Tune GPT-Neo (fixed TrainingArguments)

model = AutoModelForCausalLM.from_pretrained(model_name)
# Resize embeddings if we added a pad_token
model.resize_token_embeddings(len(tokenizer))

training_args = TrainingArguments(
    output_dir="gpt_neo_finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    logging_steps=100,
    save_steps=500,
    save_total_limit=2
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=eval_tok,
    data_collator=data_collator,
)

trainer.train()
trainer.save_model("gpt_neo_finetuned")
tokenizer.save_pretrained("gpt_neo_finetuned")
print("✅ Saved fine-tuned GPT-Neo")




<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marafatcoc01[0m ([33marafatcoc01-rmit-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
100,1.156
200,0.8427
300,0.7867
400,0.7651
500,0.7338
600,0.7075


✅ Saved fine-tuned GPT-Neo


In [21]:
# Cell 10: Inference via Few‐Shot Generative GPT-Neo with 4 Examples & Robust Parsing

import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# ── Reload fine-tuned GPT-Neo ─────────────────────────────────────────────────
tokenizer = AutoTokenizer.from_pretrained("gpt_neo_finetuned")
model     = AutoModelForCausalLM.from_pretrained("gpt_neo_finetuned")
device    = 0 if torch.cuda.is_available() else -1

gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=device
)

# ── Prepare 4 few-shot examples (2 normal, 2 anomaly) ──────────────────────────
norm_examples = df[df['type'] == 0].sample(2, random_state=1)
anom_examples = df[df['type'] == 1].sample(2, random_state=1)

examples = []
for _, row in norm_examples.iterrows():
    examples.append(("Normal", to_text(row)))
for _, row in anom_examples.iterrows():
    examples.append(("Anomaly", to_text(row)))

# ── Classify each VAE-flagged candidate ────────────────────────────────────────
preds = []
debug_raw = []

candidate_idxs = np.where(flags_vae == 1)[0]
for idx_i, i in enumerate(candidate_idxs):
    cand_text = df.loc[i, 'text']

    # Build prompt with 4 examples
    prompt = ""
    for j, (lbl, txt) in enumerate(examples, start=1):
        prompt += f"Example {j}:\nRecord: {txt}\nResult: {lbl}\n\n"
    prompt += f"Now classify this record:\nRecord: {cand_text}\nResult:"

    # Generate
    out = gen(prompt, max_new_tokens=5, truncation=True)[0]['generated_text']
    if idx_i < 5:
        debug_raw.append(out)

    # Extract only the part after the last "Result:"
    answer_section = out.split("Result:")[-1].strip()

    # Parse robustly
    low = answer_section.lower()
    if low.startswith("anomaly"):
        label = "Anomaly"
    elif low.startswith("normal"):
        label = "Normal"
    else:
        # fallback: look for keywords anywhere
        if "anomaly" in low:
            label = "Anomaly"
        elif "normal" in low:
            label = "Normal"
        else:
            label = "Unknown"

    preds.append((i, label))

# ── Debug: print the first few raw generations ─────────────────────────────────
print("Raw generations for first 5 candidates:")
for j, raw in enumerate(debug_raw, start=1):
    print(f"{j}:", raw, "\n")

# ── Evaluate ───────────────────────────────────────────────────────────────────
y_true = y[flags_vae == 1]
y_pred = np.array([1 if lab == "Anomaly" else 0 for _, lab in preds])

print("Subset accuracy:", accuracy_score(y_true, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
print("Classification report:\n", classification_report(y_true, y_pred))


Device set to use cpu


Raw generations for first 5 candidates:
1: Example 1:
Record: Time 1556109955: PID 2774, 268 minor faults, 0 major faults, 2.0% memory.
Result: Normal

Example 2:
Record: Time 1554338670: PID 3257, 14932 minor faults, 8 major faults, 7.0% memory.
Result: Normal

Example 3:
Record: Time 1556216673: PID 3292, 1913 minor faults, 0 major faults, 0.0% memory.
Result: Anomaly

Example 4:
Record: Time 1556523080: PID 2801, 8 minor faults, 0 major faults, 1.0% memory.
Result: Anomaly

Now classify this record:
Record: Time 1556723370: PID 2533, 9 minor faults, 0 major faults, 7.0% memory.
Result: Normal

Result: 

2: Example 1:
Record: Time 1556109955: PID 2774, 268 minor faults, 0 major faults, 2.0% memory.
Result: Normal

Example 2:
Record: Time 1554338670: PID 3257, 14932 minor faults, 8 major faults, 7.0% memory.
Result: Normal

Example 3:
Record: Time 1556216673: PID 3292, 1913 minor faults, 0 major faults, 0.0% memory.
Result: Anomaly

Example 4:
Record: Time 1556523080: PID 2801, 8 mino

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
# Cell 10: Inference via Zero-Shot NLI Classification (3rd option)

import torch
from transformers import pipeline

# 1) Load zero-shot classifier
device = 0 if torch.cuda.is_available() else -1
classifier = pipeline(
    "zero-shot-classification",
    model="facebook/bart-large-mnli",
    device=device
)

# 2) Candidate indices flagged by VAE
candidates = np.where(flags_vae == 1)[0]

# 3) Classify each with a hypothesis template
candidate_labels = ["normal", "anomaly"]
preds = []
for i in candidates:
    text = df.loc[i, 'text']
    res = classifier(
        text,
        candidate_labels=candidate_labels,
        hypothesis_template="This record is {}.",
        multi_label=False
    )
    top = res["labels"][0].lower()
    preds.append((i, top))

# 4) Build y_pred and evaluate
y_true = y[candidates]
y_pred = np.array([1 if lab == "anomaly" else 0 for _, lab in preds])

print("Subset accuracy:", accuracy_score(y_true, y_pred))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))
print("Classification report:\n", classification_report(y_true, y_pred))


Device set to use cpu


Subset accuracy: 0.41509433962264153
Confusion matrix:
 [[14 30]
 [ 1  8]]
Classification report:
               precision    recall  f1-score   support

           0       0.93      0.32      0.47        44
           1       0.21      0.89      0.34         9

    accuracy                           0.42        53
   macro avg       0.57      0.60      0.41        53
weighted avg       0.81      0.42      0.45        53

