<a href="https://colab.research.google.com/github/tousifo/ml_notebooks/blob/main/Blood_MedMNIST_QNN_AllInOne.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ========================================
# INSTALL DEPENDENCIES
# ========================================

print("📦 Installing dependencies...")

!pip install -q torch torchvision
!pip install -q pennylane pennylane-lightning
!pip install -q medmnist
!pip install -q scikit-learn
!pip install -q tqdm

print("\n✅ All packages installed successfully!")
print("   - PyTorch")
print("   - PennyLane + Lightning")
print("   - MedMNIST")
print("   - Scikit-learn")
print("   - tqdm")

📦 Installing dependencies...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.1/57.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m934.3/934.3 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m61.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m78.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m167.9/167.9 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.9/115.9 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25h
✅ All packages

In [2]:
# ========================================
# HYBRID QUANTUM-RNN ARCHITECTURE
# Original design with stability fixes
# ========================================

import os, sys

ROOT = "/content/hybridqnn_seq"
SRC = f"{ROOT}/src"
os.makedirs(SRC, exist_ok=True)
open(f"{SRC}/__init__.py", "w").write("")
sys.path.append(ROOT)

# Install dependencies
print("📦 Installing packages...")
!pip install -q torch torchvision pennylane pennylane-lightning medmnist scikit-learn tqdm numpy

print("✅ Setup complete!")

📦 Installing packages...
✅ Setup complete!


In [3]:
# ---------- src/data.py ----------
open(f"{SRC}/data.py", "w").write(r'''
import medmnist
from medmnist import INFO
import torch
from torch.utils.data import Dataset
import numpy as np

class MedMNISTDataset(Dataset):
    def __init__(self, dataset_name: str, split: str = "train", transform=None):
        super().__init__()
        info = INFO[dataset_name.lower()]
        DataClass = getattr(medmnist, info['python_class'])
        self.dataset = DataClass(split=split, download=True, transform=transform)
        self.n_classes = len(info['label'])
        self.task = info['task']

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, label = self.dataset[idx]
        if not isinstance(img, torch.Tensor):
            img = torch.from_numpy(np.array(img)).float()
        if len(img.shape) == 2:
            img = img.unsqueeze(0)
        elif len(img.shape) == 3 and img.shape[2] in [1, 3]:
            img = img.permute(2, 0, 1)
        img = img / 255.0

        if isinstance(label, np.ndarray):
            label = torch.from_numpy(label).long()
        elif not isinstance(label, torch.Tensor):
            label = torch.tensor(label, dtype=torch.long)

        if label.dim() > 0:
            label = label.squeeze()

        return img, label
''')

print("✅ Data loader created")

✅ Data loader created


4. model

In [4]:
# ---------- src/patches.py ----------
open(f"{SRC}/patches.py", "w").write(r'''
import torch
import torch.nn as nn

class PatchEmbedding(nn.Module):
    """Convert image to patches and embed them"""
    def __init__(self, in_channels: int = 1, patch_size: int = 4, embed_dim: int = 64):
        super().__init__()
        self.patch_size = patch_size
        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)
        # ✅ FIX: Add normalization
        self.norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        # x: (B, C, H, W) -> (B, D, H', W') -> (B, N, D)
        x = self.proj(x)
        B, C, H, W = x.shape
        x = x.flatten(2).transpose(1, 2)  # (B, N, D)
        x = self.norm(x)  # ✅ Normalize patches
        return x
''')

print("✅ Patch embedding created")

✅ Patch embedding created


In [5]:
# ---------- src/rnn.py ----------
open(f"{SRC}/rnn.py", "w").write(r'''
import torch
import torch.nn as nn

class RNNRouter(nn.Module):
    """RNN with attention to select top-K important patches"""
    def __init__(self, D: int, K: int):
        super().__init__()
        self.D = D
        self.K = K

        # ✅ FIX: Bidirectional LSTM for better context
        self.rnn = nn.LSTM(D, D // 2, num_layers=2, batch_first=True,
                          bidirectional=True, dropout=0.2)

        # ✅ FIX: Multi-head attention
        self.attn = nn.MultiheadAttention(D, num_heads=4, batch_first=True, dropout=0.2)

        # Score projection
        self.topk_proj = nn.Linear(D, 1)

    def forward(self, patches):
        # patches: (B, N, D)
        B, N, D = patches.shape

        # RNN processing
        rnn_out, _ = self.rnn(patches)  # (B, N, D)

        # Self-attention
        attn_out, _ = self.attn(rnn_out, rnn_out, rnn_out)  # (B, N, D)

        # Compute importance scores
        scores = self.topk_proj(attn_out).squeeze(-1)  # (B, N)

        # Select top-K patches
        topk_vals, topk_idx = torch.topk(scores, min(self.K, N), dim=1)

        # Gather selected patches
        selected = []
        for i in range(B):
            selected.append(attn_out[i, topk_idx[i], :])
        selected = torch.stack(selected, dim=0)  # (B, K, D)

        # Aggregate to fixed-size vector
        kvec = selected.mean(dim=1)  # (B, D)

        return kvec
''')

print("✅ RNN router created")

✅ RNN router created


In [6]:
# ---------- src/quantum.py ----------
open(f"{SRC}/quantum.py", "w").write(r'''
import pennylane as qml
import torch
import torch.nn as nn
import numpy as np

class TorchQNN(nn.Module):
    """Optimized Quantum Neural Network"""
    def __init__(self, K: int, Q: int = 6, L: int = 3):
        super().__init__()
        self.Q = Q
        self.L = L

        # Input projection
        self.lin_in = nn.Linear(K, Q)

        # ✅ FIX: Better weight initialization
        self.q_weights = nn.Parameter(torch.empty(L, Q, 3))
        nn.init.uniform_(self.q_weights, -np.pi/2, np.pi/2)  # Smaller range

        # Use default.qubit for stability
        self.dev = qml.device('default.qubit', wires=Q)

        self.circuit = self._make_circuit()

    def _make_circuit(self):
        @qml.qnode(self.dev, interface='torch', diff_method='backprop')
        def circuit(inputs, weights):
            # ✅ Data encoding
            for l in range(self.L):
                for w in range(self.Q):
                    qml.RX(inputs[w], wires=w)
                    qml.RZ(0.5 * inputs[w], wires=w)

                # ✅ Variational layer
                qml.StronglyEntanglingLayers(weights[l:l+1], wires=range(self.Q))

                # Entanglement
                for w in range(self.Q - 1):
                    qml.CNOT([w, w + 1])
                if self.Q > 1:
                    qml.CNOT([self.Q - 1, 0])

            return [qml.expval(qml.PauliZ(w)) for w in range(self.Q)]

        return circuit

    def forward(self, kvec):
        B = kvec.shape[0]
        qinput = torch.tanh(self.lin_in(kvec))

        # ✅ FIX: Process in smaller chunks
        chunk_size = 8
        outputs = []

        for i in range(0, B, chunk_size):
            batch = qinput[i:i+chunk_size]
            batch_outputs = []

            for j in range(batch.shape[0]):
                out = self.circuit(batch[j], self.q_weights)
                out_tensor = torch.stack(out).float()
                batch_outputs.append(out_tensor)

            outputs.append(torch.stack(batch_outputs))

        return torch.cat(outputs, dim=0)
''')

print("✅ Quantum layer created")

✅ Quantum layer created


In [7]:
# ---------- src/models.py ----------
open(f"{SRC}/models.py", "w").write(r'''
import torch
import torch.nn as nn
from src.patches import PatchEmbedding
from src.rnn import RNNRouter
from src.quantum import TorchQNN

class HybridQRNN(nn.Module):
    """Complete Hybrid Quantum-RNN Architecture"""
    def __init__(self, in_channels: int, num_classes: int, patch_size: int = 4,
                 embed_dim: int = 64, K: int = 10, Q: int = 6, L: int = 3):
        super().__init__()

        # Patch extraction
        self.patch_embed = PatchEmbedding(in_channels, patch_size, embed_dim)

        # RNN-based routing
        self.rnn = RNNRouter(embed_dim, K)

        # Quantum layer
        self.qnn = TorchQNN(embed_dim, Q, L)

        # ✅ FIX: Better classifier
        self.fc = nn.Sequential(
            nn.Linear(Q, embed_dim),
            nn.LayerNorm(embed_dim),
            nn.GELU(),
            nn.Dropout(0.3),
            nn.Linear(embed_dim, num_classes)
        )

        # ✅ FIX: Initialize classifier
        self._init_classifier()

    def _init_classifier(self):
        for m in self.fc.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

    def forward(self, x):
        patches = self.patch_embed(x)  # (B, N, D)
        kvec = self.rnn(patches)        # (B, D)
        qout = self.qnn(kvec)           # (B, Q)
        logits = self.fc(qout)          # (B, C)
        return logits
''')

print("✅ Hybrid model created")

✅ Hybrid model created


In [8]:
# ---------- src/train.py ----------
open(f"{SRC}/train.py", "w").write(r'''
import os, argparse
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, roc_auc_score
from src.data import MedMNISTDataset
from src.models import HybridQRNN

class LabelSmoothingCE(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing

    def forward(self, pred, target):
        n_class = pred.size(1)
        one_hot = torch.zeros_like(pred).scatter_(1, target.view(-1, 1), 1)
        one_hot = one_hot * (1 - self.smoothing) + self.smoothing / n_class
        log_prob = torch.log_softmax(pred, dim=1)
        loss = -(one_hot * log_prob).sum(dim=1).mean()
        return loss

def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0

    for imgs, labels in tqdm(loader, desc="Training", leave=False):
        imgs, labels = imgs.to(device), labels.to(device)

        optimizer.zero_grad()
        logits = model(imgs)
        loss = criterion(logits, labels)
        loss.backward()

        # ✅ Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(loader)

@torch.no_grad()
def evaluate(model, loader, device, num_classes):
    model.eval()
    all_preds, all_labels, all_probs = [], [], []

    for imgs, labels in loader:
        imgs = imgs.to(device)
        logits = model(imgs)
        probs = torch.softmax(logits, dim=1)
        preds = torch.argmax(logits, dim=1)

        all_preds.append(preds.cpu().numpy())
        all_labels.append(labels.numpy())
        all_probs.append(probs.cpu().numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)
    all_probs = np.concatenate(all_probs)

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='weighted')

    try:
        if num_classes == 2:
            auc = roc_auc_score(all_labels, all_probs[:, 1])
        else:
            auc = roc_auc_score(all_labels, all_probs, multi_class='ovr', average='weighted')
    except:
        auc = 0.0

    return acc, f1, auc

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--dataset", type=str, required=True)
    parser.add_argument("--Q", type=int, default=6)
    parser.add_argument("--L", type=int, default=3)
    parser.add_argument("--K", type=int, default=10)
    parser.add_argument("--hidden", type=int, default=64)
    parser.add_argument("--epochs", type=int, default=50)
    parser.add_argument("--seeds", type=int, default=5)
    parser.add_argument("--out", type=str, required=True)
    args = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")

    # Load datasets
    train_ds = MedMNISTDataset(args.dataset, split="train")
    val_ds = MedMNISTDataset(args.dataset, split="val")
    test_ds = MedMNISTDataset(args.dataset, split="test")

    num_classes = train_ds.n_classes
    in_channels = train_ds[0][0].shape[0]

    # ✅ Batch size = 8 for quantum circuits
    train_loader = DataLoader(train_ds, batch_size=8, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_ds, batch_size=8, shuffle=False, num_workers=2)
    test_loader = DataLoader(test_ds, batch_size=8, shuffle=False, num_workers=2)

    results = []

    for seed in range(args.seeds):
        print(f"\n{'='*60}")
        print(f"Seed {seed}/{args.seeds-1}")
        print(f"{'='*60}")

        torch.manual_seed(seed)
        np.random.seed(seed)

        model = HybridQRNN(
            in_channels=in_channels,
            num_classes=num_classes,
            patch_size=4,
            embed_dim=args.hidden,
            K=args.K,
            Q=args.Q,
            L=args.L
        ).to(device)

        criterion = LabelSmoothingCE(smoothing=0.1)

        # ✅ FIX: Lower LR + AdamW
        optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, weight_decay=1e-4)

        # ✅ FIX: Warmup + Cosine schedule
        warmup_epochs = 5
        warmup_scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=0.1, total_iters=warmup_epochs
        )
        main_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=args.epochs - warmup_epochs, eta_min=1e-6
        )
        scheduler = torch.optim.lr_scheduler.SequentialLR(
            optimizer,
            schedulers=[warmup_scheduler, main_scheduler],
            milestones=[warmup_epochs]
        )

        best_f1 = 0
        patience = 15  # ✅ Increased patience
        wait = 0

        for epoch in range(1, args.epochs + 1):
            train_loss = train_one_epoch(model, train_loader, criterion, optimizer, device)
            val_acc, val_f1, val_auc = evaluate(model, val_loader, device, num_classes)

            print(f"[seed {seed}] epoch {epoch:02d}  train_loss={train_loss:.4f}  "
                  f"val_f1={val_f1:.4f}  val_acc={val_acc:.4f}  wait={wait}/{patience}")

            if val_f1 > best_f1:
                best_f1 = val_f1
                wait = 0
                torch.save(model.state_dict(), f"{args.out}/best_seed{seed}.pt")
            else:
                wait += 1

            if wait >= patience:
                print(f"Early stopping at epoch {epoch}")
                break

            scheduler.step()

        # Test
        model.load_state_dict(torch.load(f"{args.out}/best_seed{seed}.pt"))
        test_acc, test_f1, test_auc = evaluate(model, test_loader, device, num_classes)

        results.append({
            'seed': seed,
            'test_acc': test_acc,
            'test_f1': test_f1,
            'test_auc': test_auc
        })

        print(f"\n[Seed {seed}] Test: ACC={test_acc:.4f}, F1={test_f1:.4f}, AUC={test_auc:.4f}")

    # Summary
    import pandas as pd
    df = pd.DataFrame(results)
    df.to_csv(f"{args.out}/results.csv", index=False)

    print(f"\n{'='*60}")
    print(f"Final Results ({args.seeds} seeds)")
    print(f"{'='*60}")
    print(f"ACC: {df['test_acc'].mean():.4f} ± {df['test_acc'].std():.4f}")
    print(f"F1: {df['test_f1'].mean():.4f} ± {df['test_f1'].std():.4f}")
    print(f"AUC: {df['test_auc'].mean():.4f} ± {df['test_auc'].std():.4f}")

if __name__ == "__main__":
    main()
''')

print("✅ Training script created")

✅ Training script created


In [None]:
%cd /content/hybridqnn_seq
import os
from datetime import datetime

TS = datetime.now().strftime("%Y%m%d_%H%M%S")

# ✅ OPTIMIZED CONFIG: Original architecture with fixes
CONFIGS = [
    # (DATASET,       Q,  L,  K, HIDDEN, EPOCHS, SEEDS)
    ("BloodMNIST",   6,  3,  10,   64,    50,     5),
]

for DATASET, Q, L, K, HIDDEN, EPOCHS, SEEDS in CONFIGS:
    OUTDIR = f"runs/{DATASET}_Q{Q}L{L}_K{K}_H{HIDDEN}_{TS}"
    os.makedirs(OUTDIR, exist_ok=True)

    print(f"\n{'='*70}")
    print(f"🔬 Training {DATASET} → {OUTDIR}")
    print(f"{'='*70}")

    !python -m src.train \
        --dataset {DATASET} \
        --Q {Q} --L {L} --K {K} --hidden {HIDDEN} \
        --epochs {EPOCHS} --seeds {SEEDS} \
        --out {OUTDIR}

    print(f"✅ Completed: {OUTDIR}\n")

print(f"\n{'='*70}")
print("🎉 Training complete!")
print(f"{'='*70}")

/content/hybridqnn_seq

🔬 Training BloodMNIST → runs/BloodMNIST_Q6L3_K10_H64_20251026_193607
Device: cpu
100% 35.5M/35.5M [00:01<00:00, 20.1MB/s]

Seed 0/4
[seed 0] epoch 01  train_loss=1.9454  val_f1=0.3755  val_acc=0.4755  wait=0/15
[seed 0] epoch 02  train_loss=1.5271  val_f1=0.3821  val_acc=0.4638  wait=0/15
[seed 0] epoch 03  train_loss=1.2164  val_f1=0.6812  val_acc=0.7068  wait=0/15
[seed 0] epoch 04  train_loss=1.0737  val_f1=0.7610  val_acc=0.7623  wait=0/15
[seed 0] epoch 05  train_loss=1.0026  val_f1=0.7829  val_acc=0.7886  wait=0/15
[seed 0] epoch 06  train_loss=0.9602  val_f1=0.7310  val_acc=0.7272  wait=0/15
[seed 0] epoch 07  train_loss=0.9149  val_f1=0.7299  val_acc=0.7477  wait=1/15
[seed 0] epoch 08  train_loss=0.8810  val_f1=0.8232  val_acc=0.8283  wait=2/15
[seed 0] epoch 09  train_loss=0.8511  val_f1=0.8737  val_acc=0.8721  wait=0/15
[seed 0] epoch 10  train_loss=0.8239  val_f1=0.8318  val_acc=0.8435  wait=0/15
[seed 0] epoch 11  train_loss=0.7874  val_f1=0.8708  v