<a href="https://colab.research.google.com/github/shuvookd/Thesis_MSc/blob/main/moco_linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# -------------------------------
# Load and Prepare Data
# -------------------------------
train = pd.read_parquet("/content/UNSW_NB15_training-set.parquet")
test = pd.read_parquet("/content/UNSW_NB15_testing-set.parquet")

df = pd.concat([train, test])
df = df.drop(columns=["id", "attack_cat"], errors="ignore")

for col in ['proto', 'service', 'state']:
    df[col] = pd.factorize(df[col])[0]

X = df.drop(columns=["label"])
y = df["label"]

scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# -------------------------------
# MoCo Model Components
# -------------------------------
def build_encoder(input_dim):
    inputs = layers.Input(shape=(input_dim,))
    x = layers.Dense(256)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('swish')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('swish')(x)
    projection = layers.Dense(64)(x)
    return models.Model(inputs, projection)

def contrastive_loss(query, key, queue, temperature=0.07):
    query = tf.math.l2_normalize(query, axis=1)
    key = tf.math.l2_normalize(key, axis=1)
    queue = tf.math.l2_normalize(queue, axis=1)
    l_pos = tf.reshape(tf.reduce_sum(query * key, axis=1), [-1, 1])
    l_neg = tf.matmul(query, queue, transpose_b=True)
    logits = tf.concat([l_pos, l_neg], axis=1) / temperature
    labels = tf.zeros(logits.shape[0], dtype=tf.int32)
    return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels, logits))

def augment_tabular(X, mask_prob=0.15, noise_std=0.05):
    mask = (np.random.rand(*X.shape) > mask_prob).astype(np.float32)
    noise = np.random.normal(0, noise_std, size=X.shape).astype(np.float32)
    return X * mask + noise

@tf.function
def momentum_update(query_encoder, key_encoder, m=0.999):
    for q, k in zip(query_encoder.trainable_variables, key_encoder.trainable_variables):
        k.assign(m * k + (1 - m) * q)

class MoCoQueue:
    def __init__(self, dim=64, size=8192):
        self.queue = tf.Variable(tf.random.normal([size, dim]), trainable=False)
        self.queue.assign(tf.math.l2_normalize(self.queue, axis=1))
        self.ptr = tf.Variable(0, trainable=False, dtype=tf.int32)
        self.size = size

    @tf.function
    def enqueue(self, keys):
        batch_size = tf.shape(keys)[0]
        idx = tf.range(self.ptr, self.ptr + batch_size) % self.size
        self.queue.scatter_nd_update(tf.expand_dims(idx, 1), keys)
        self.ptr.assign((self.ptr + batch_size) % self.size)

    def get(self):
        return self.queue

# -------------------------------
# Run MoCo + Logistic Regression
# -------------------------------
def run_moco_logreg(X, y, ratios, epochs=20, batch_size=512):
    results = []
    combo_id = 1

    for train_pct, test_pct in ratios:
        print(f"\n🔁 Combo {combo_id} → Train:Test = {train_pct}:{test_pct}")
        X_train, X_test, y_train, y_test = train_test_split(
            X.astype(np.float32), y,
            test_size=test_pct / 100, stratify=y, random_state=42
        )

        encoder = build_encoder(X_train.shape[1])
        key_encoder = build_encoder(X_train.shape[1])
        for q, k in zip(encoder.variables, key_encoder.variables):
            k.assign(q)

        optimizer = tf.keras.optimizers.Adam(3e-4)
        queue = MoCoQueue()

        for epoch in range(epochs):
            idx = np.random.permutation(X_train.shape[0])
            X_shuffle = X_train[idx]

            for i in range(0, len(X_shuffle), batch_size):
                batch = X_shuffle[i:i + batch_size]
                if batch.shape[0] < 2: continue
                x_q = augment_tabular(batch)
                x_k = augment_tabular(batch)

                with tf.GradientTape() as tape:
                    z_q = encoder(x_q, training=True)
                    z_k = key_encoder(x_k, training=False)
                    z_k = tf.stop_gradient(z_k)
                    loss = contrastive_loss(z_q, z_k, queue.get())

                grads = tape.gradient(loss, encoder.trainable_variables)
                optimizer.apply_gradients(zip(grads, encoder.trainable_variables))
                momentum_update(encoder, key_encoder)
                queue.enqueue(z_k)

            print(f" Epoch {epoch + 1}/{epochs}, Loss: {float(loss):.4f}")

        emb_train = encoder.predict(X_train)
        emb_test = encoder.predict(X_test)

        clf = LogisticRegression(max_iter=1000)
        clf.fit(emb_train, y_train)
        y_pred = clf.predict(emb_test)

        report = classification_report(y_test, y_pred, output_dict=True)

        results.append({
            'combo': combo_id,
            'train_pct': train_pct,
            'test_pct': test_pct,
            'epoch': epochs,
            'loss': float(loss),
            'accuracy': report['accuracy'],
            'f1_0': report['0']['f1-score'],
            'f1_1': report['1']['f1-score']
        })

        print(f"✅ Finished Combo {combo_id}: "
              f"Accuracy={report['accuracy']:.4f}, "
              f"F1 Class 0={report['0']['f1-score']:.4f}, "
              f"F1 Class 1={report['1']['f1-score']:.4f}")
        combo_id += 1

    return pd.DataFrame(results)

# -------------------------------
# Train + Save + Visualize
# -------------------------------
ratios = [(10, 90), (20, 80), (30, 70), (40, 60), (50, 50), (60, 40), (70, 30), (80, 20), (90, 10)]
results_df = run_moco_logreg(X.values, y.values, ratios)
results_df.to_csv("moco_logreg_results.csv", index=False)

# Accuracy vs Epoch Plot
plt.figure(figsize=(12, 7))
sns.lineplot(data=results_df, x="epoch", y="accuracy", hue="train_pct", marker="o")
plt.title("Accuracy vs Epoch for Each Train Ratio (Logistic Regression)")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend(title="Train %")
plt.tight_layout()
plt.show()

# Average F1 Score Plot
avg_scores = results_df.groupby("train_pct")[["f1_0", "f1_1"]].mean().reset_index()
avg_scores = pd.melt(avg_scores, id_vars="train_pct", var_name="Class", value_name="F1-Score")

plt.figure(figsize=(12, 6))
sns.barplot(data=avg_scores, x="train_pct", y="F1-Score", hue="Class")
plt.title("Average F1-Score by Train Ratio and Class (Logistic Regression)")
plt.xlabel("Train %")
plt.ylabel("F1-Score")
plt.tight_layout()
plt.show()

# Best Accuracy Plot
best_acc = results_df.groupby("train_pct")["accuracy"].max().reset_index()
plt.figure(figsize=(10, 6))
sns.barplot(data=best_acc, x="train_pct", y="accuracy", palette="viridis")
plt.title("Best Accuracy per Train % (Logistic Regression)")
plt.xlabel("Train %")
plt.ylabel("Best Accuracy")
plt.ylim(0.5, 1.0)
plt.tight_layout()
plt.show()



🔁 Combo 1 → Train:Test = 10:90
 Epoch 1/20, Loss: 6.5520
 Epoch 2/20, Loss: 6.1999
 Epoch 3/20, Loss: 6.0399
 Epoch 4/20, Loss: 6.0731
 Epoch 5/20, Loss: 6.0904
 Epoch 6/20, Loss: 5.9183
 Epoch 7/20, Loss: 6.0716
 Epoch 8/20, Loss: 6.0081
 Epoch 9/20, Loss: 5.8959
 Epoch 10/20, Loss: 5.7670
 Epoch 11/20, Loss: 5.8386
 Epoch 12/20, Loss: 5.8900
 Epoch 13/20, Loss: 5.5774
 Epoch 14/20, Loss: 5.6856
 Epoch 15/20, Loss: 5.7271
 Epoch 16/20, Loss: 5.8022
