In [3]:
#!/usr/bin/env python3
import random
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from torchvision import transforms
from torchvision.models import vgg16, VGG16_Weights
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    roc_auc_score,
    roc_curve,
    ConfusionMatrixDisplay
)
from tqdm import tqdm
import matplotlib.pyplot as plt

# ──────────────────────────────────────────────────────────────────────────────
# 1) Config & reproducibility
# ──────────────────────────────────────────────────────────────────────────────
STEGO_CSV    = Path("csv/stego_final.csv")
IMAGES_DIR   = Path("Images")
BATCH_SIZE   = 32
EPOCHS       = 25
LR           = 1e-3
WEIGHT_DECAY = 1e-4
LABEL_SMOOTH = 0.1
RANDOM_SEED  = 42

random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

DEVICE = "cuda:2" if torch.cuda.is_available() else "cpu"

# ──────────────────────────────────────────────────────────────────────────────
# 2) Build & split DataFrame (2 classes: none vs lsb)
# ──────────────────────────────────────────────────────────────────────────────
df = pd.read_csv(STEGO_CSV)

# keep only LSB stego images
df_stego = df[df.method == "lsb"].copy()
df_stego["label"]    = "lsb"
df_stego["img_path"] = df_stego["stego_path"]

# sample equal number of cover images
n_stego    = len(df_stego)
all_images = list(IMAGES_DIR.glob("*"))
used_names = set(Path(p).name for p in df_stego["img_path"])
candidates = [str(p) for p in all_images if p.name not in used_names]
df_none = pd.DataFrame({
    "label":    ["none"] * n_stego,
    "img_path": random.sample(candidates, n_stego)
})

# combine & shuffle
df2 = pd.concat([df_stego[["label","img_path"]], df_none], ignore_index=True)
df2 = df2.sample(frac=1, random_state=RANDOM_SEED).reset_index(drop=True)

# stratified train/val/test split
df_train, df_test = train_test_split(
    df2, test_size=0.30, stratify=df2["label"], random_state=RANDOM_SEED
)
df_train, df_val  = train_test_split(
    df_train, test_size=0.20, stratify=df_train["label"], random_state=RANDOM_SEED
)

# ──────────────────────────────────────────────────────────────────────────────
# 3) Dataset & DataLoaders
# ──────────────────────────────────────────────────────────────────────────────
label_map = {"none": 0, "lsb": 1}

train_tf = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225]),
])
val_tf = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225]),
])

class Stego2Dataset(Dataset):
    def __init__(self, df, tf):
        self.df = df.reset_index(drop=True)
        self.tf = tf
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row   = self.df.iloc[idx]
        img   = Image.open(row.img_path).convert("RGB")
        img   = self.tf(img)
        label = label_map[row.label]
        return img, label

train_loader = DataLoader(
    Stego2Dataset(df_train, train_tf),
    batch_size=BATCH_SIZE, shuffle=True,
    num_workers=4, pin_memory=True
)
val_loader = DataLoader(
    Stego2Dataset(df_val,   val_tf),
    batch_size=BATCH_SIZE, shuffle=False,
    num_workers=4, pin_memory=True
)
test_loader = DataLoader(
    Stego2Dataset(df_test,  val_tf),
    batch_size=BATCH_SIZE, shuffle=False,
    num_workers=4, pin_memory=True
)



In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 4) Model: VGG-16 → 2-way head
# ──────────────────────────────────────────────────────────────────────────────
model = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
in_f = model.classifier[6].in_features
model.classifier[6] = nn.Linear(in_f, 2)
model = model.to(DEVICE)

# ──────────────────────────────────────────────────────────────────────────────
# 5) Loss, optimizer (fixed LR), TensorBoard
# ──────────────────────────────────────────────────────────────────────────────
# inverse-frequency class weights
counts = df2.label.value_counts().to_dict()
total = len(df2)
weights = [ total/(2*counts[c]) for c in ["none","lsb"] ]
class_weights = torch.tensor(weights, device=DEVICE)

criterion = nn.CrossEntropyLoss(
    weight=class_weights,
    label_smoothing=LABEL_SMOOTH
)
optimizer = Adam(
    model.parameters(),
    lr=LR,
    weight_decay=WEIGHT_DECAY
)

writer       = SummaryWriter("runs/stego_vgg2_fixedlr")
train_losses = []; val_losses = []
train_accs   = []; val_accs   = []

# ──────────────────────────────────────────────────────────────────────────────
# 6) Training & validation loops
# ──────────────────────────────────────────────────────────────────────────────
def train_epoch():
    model.train()
    loss_sum = correct = total = 0
    for imgs, lbls in tqdm(train_loader, desc="Train", leave=False):
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        optimizer.zero_grad()
        out  = model(imgs)
        loss = criterion(out, lbls)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item() * lbls.size(0)
        preds    = out.argmax(dim=1)
        correct  += (preds==lbls).sum().item()
        total    += lbls.size(0)
    return loss_sum/total, correct/total

@torch.no_grad()
def validate():
    model.eval()
    loss_sum = correct = total = 0
    for imgs, lbls in tqdm(val_loader, desc="Val  ", leave=False):
        imgs, lbls = imgs.to(DEVICE), lbls.to(DEVICE)
        out  = model(imgs)
        loss = criterion(out, lbls)

        loss_sum += loss.item() * lbls.size(0)
        preds    = out.argmax(dim=1)
        correct  += (preds==lbls).sum().item()
        total    += lbls.size(0)
    return loss_sum/total, correct/total

best_val_acc = 0.0
for epoch in range(1, EPOCHS+1):
    tr_loss, tr_acc   = train_epoch()
    val_loss, val_acc = validate()

    train_losses.append(tr_loss);  val_losses.append(val_loss)
    train_accs.append(tr_acc);     val_accs.append(val_acc)

    writer.add_scalar("Loss/train", tr_loss, epoch)
    writer.add_scalar("Loss/val",   val_loss, epoch)
    writer.add_scalar("Acc/train",  tr_acc,   epoch)
    writer.add_scalar("Acc/val",    val_acc,   epoch)

    print(f"Epoch {epoch}/{EPOCHS}  "
          f"Train loss={tr_loss:.4f}, acc={tr_acc:.4f}  "
          f" Val loss={val_loss:.4f}, acc={val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_vgg2_fixedlr.pth")
        print(" → new best!")

writer.close()



                                                                                

Epoch 1/25  Train loss=1.1288, acc=0.5188   Val loss=0.7165, acc=0.5000
 → new best!


                                                                                

Epoch 2/25  Train loss=0.7022, acc=0.4804   Val loss=0.6957, acc=0.5000


                                                                                

Epoch 3/25  Train loss=0.6987, acc=0.5080   Val loss=0.6939, acc=0.5000


                                                                                

Epoch 4/25  Train loss=0.6960, acc=0.4920   Val loss=0.7011, acc=0.5000


                                                                                

Epoch 5/25  Train loss=0.6982, acc=0.4884   Val loss=0.6938, acc=0.5000


                                                                                

Epoch 6/25  Train loss=0.7070, acc=0.4830   Val loss=0.6989, acc=0.5000


                                                                                

Epoch 7/25  Train loss=0.6989, acc=0.4955   Val loss=0.6932, acc=0.5000


                                                                                

Epoch 8/25  Train loss=0.6933, acc=0.5214   Val loss=0.6942, acc=0.5000


                                                                                

Epoch 9/25  Train loss=0.7157, acc=0.4813   Val loss=0.6977, acc=0.5000


                                                                                

Epoch 10/25  Train loss=0.8827, acc=0.5232   Val loss=0.7193, acc=0.5000


                                                                                

Epoch 11/25  Train loss=0.7204, acc=0.5089   Val loss=0.6934, acc=0.5000


                                                                                

Epoch 12/25  Train loss=0.6967, acc=0.4893   Val loss=0.6933, acc=0.5000


                                                                                

Epoch 13/25  Train loss=0.7005, acc=0.5116   Val loss=0.7008, acc=0.5000


                                                                                

Epoch 14/25  Train loss=0.6963, acc=0.5054   Val loss=0.6937, acc=0.5000


                                                                                

Epoch 15/25  Train loss=0.6949, acc=0.4875   Val loss=0.6932, acc=0.5000


                                                                                

Epoch 16/25  Train loss=0.6965, acc=0.4946   Val loss=0.6932, acc=0.5000


                                                                                

Epoch 17/25  Train loss=0.6948, acc=0.5062   Val loss=0.6955, acc=0.5000


                                                                                

Epoch 18/25  Train loss=0.6947, acc=0.5107   Val loss=0.6937, acc=0.5000


Val  :   0%|                                              | 0/9 [00:00<?, ?it/s]

In [None]:
# ──────────────────────────────────────────────────────────────────────────────
# 7) Plot Loss & Accuracy vs. Epoch
# ──────────────────────────────────────────────────────────────────────────────
epochs = range(1, EPOCHS+1)

plt.figure(figsize=(8,5))
plt.plot(epochs, train_accs, marker='o', label='Train Acc')
plt.plot(epochs, val_accs,   marker='o', label='Val   Acc')
plt.xlabel('Epoch'); plt.ylabel('Accuracy')
plt.title('Accuracy vs. Epoch'); plt.legend(); plt.grid(True); plt.show()

plt.figure(figsize=(8,5))
plt.plot(epochs, train_losses, marker='o', label='Train Loss')
plt.plot(epochs, val_losses,   marker='o', label='Val   Loss')
plt.xlabel('Epoch'); plt.ylabel('Loss')
plt.title('Loss vs. Epoch'); plt.legend(); plt.grid(True); plt.show()

# ──────────────────────────────────────────────────────────────────────────────
# 8) Test-set evaluation
# ──────────────────────────────────────────────────────────────────────────────
model.load_state_dict(torch.load("best_vgg2_fixedlr.pth"))
model.eval()

all_lbls, all_preds, all_probs = [], [], []
with torch.no_grad():
    for imgs, lbls in test_loader:
        imgs = imgs.to(DEVICE)
        out  = model(imgs)
        probs= torch.softmax(out, dim=1)[:,1].cpu().numpy()
        preds= out.argmax(dim=1).cpu().numpy()

        all_lbls.extend(lbls.numpy())
        all_preds.extend(preds)
        all_probs.extend(probs)

# confusion matrix
cm = confusion_matrix(all_lbls, all_preds)
ConfusionMatrixDisplay(cm, display_labels=["none","lsb"])\
    .plot(cmap="Blues")
plt.title("Confusion Matrix (thr=0.5)"); plt.show()

# classification report
print(classification_report(all_lbls, all_preds, target_names=["none","lsb"]))

# ROC & AUC
fpr, tpr, _ = roc_curve(all_lbls, all_probs)
auc_score   = roc_auc_score(all_lbls, all_probs)
plt.figure()
plt.plot(fpr, tpr, label=f"AUC = {auc_score:.4f}")
plt.plot([0,1],[0,1],"k--")
plt.xlabel("FPR"); plt.ylabel("TPR")
plt.title("ROC Curve"); plt.legend(); plt.show()

# optimal threshold
opt_idx = np.argmax(tpr - fpr)
print("Optimal threshold (max TPR–FPR):", _[opt_idx])
