In [19]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
import random


In [20]:
# Gaussian Noise
def add_gaussian_noise(img, mean=0, std=0.3):
    noise = torch.randn(img.size()) * std + mean
    noisy_img = img + noise
    return torch.clamp(noisy_img, 0., 1.)

# Occlusion Noise (random black box)
def add_occlusion(img, box_size=7):
    img = img.clone()
    _, h, w = img.shape
    x = random.randint(0, w - box_size)
    y = random.randint(0, h - box_size)
    img[:, y:y+box_size, x:x+box_size] = 0.0
    return img


In [21]:
class NoisyMNISTDataset(torch.utils.data.Dataset):
    def __init__(self, base_dataset, noise_type='gaussian'):
        self.data = base_dataset
        self.noise_type = noise_type

    def __getitem__(self, index):
        img, label = self.data[index]
        if self.noise_type == 'gaussian':
            img = add_gaussian_noise(img)
        elif self.noise_type == 'occlusion':
            img = add_occlusion(img)
        return img, label

    def __len__(self):
        return len(self.data)

# Load original MNIST
transform = transforms.ToTensor()
train_clean = datasets.MNIST(root='./data1', train=True, download=True, transform=transform)
test_clean = datasets.MNIST(root='./data1', train=False, download=True, transform=transform)

# Create noisy versions
train_noisy = NoisyMNISTDataset(train_clean, noise_type='gaussian')  # or 'occlusion'
test_noisy  = NoisyMNISTDataset(test_clean, noise_type='gaussian')

train_loader = DataLoader(train_noisy, batch_size=128, shuffle=True)
test_loader = DataLoader(test_noisy, batch_size=256, shuffle=False)


In [22]:
class TeacherCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(64 * 7 * 7, 128), nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

class StudentCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 16, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2)
        )
        self.fc = nn.Sequential(
            nn.Linear(32 * 7 * 7, 64), nn.ReLU(),
            nn.Linear(64, 10)
        )

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


In [23]:
def distillation_loss(student_logits, teacher_logits, labels, T=3, alpha=0.7):
    soft_loss = nn.KLDivLoss(reduction='batchmean')(
        F.log_softmax(student_logits / T, dim=1),
        F.softmax(teacher_logits / T, dim=1)
    ) * (T * T)
    hard_loss = F.cross_entropy(student_logits, labels)
    return alpha * soft_loss + (1 - alpha) * hard_loss

def train_kd(student, teacher, optimizer, train_loader, test_loader, epochs=5):
    student = student.to(device)
    teacher = teacher.to(device)
    teacher.eval()

    history = {'epoch':[], 'train_loss':[], 'train_acc':[], 'test_loss':[], 'test_acc':[], 'time':[]}

    for ep in range(1, epochs+1):
        student.train()
        running_loss, correct, total = 0, 0, 0
        start_time = time.time()

        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            with torch.no_grad():
                t_logits = teacher(xb)
            s_logits = student(xb)
            loss = distillation_loss(s_logits, t_logits, yb)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * xb.size(0)
            _, preds = torch.max(s_logits, 1)
            correct += (preds == yb).sum().item()
            total += xb.size(0)

        train_loss = running_loss / total
        train_acc = correct / total
        test_loss, correct, total = 0, 0, 0

        student.eval()
        with torch.no_grad():
            for xb, yb in test_loader:
                xb, yb = xb.to(device), yb.to(device)
                out = student(xb)
                loss = F.cross_entropy(out, yb)
                test_loss += loss.item() * xb.size(0)
                _, preds = torch.max(out, 1)
                correct += (preds == yb).sum().item()
                total += xb.size(0)
        test_loss /= total
        test_acc = correct / total
        epoch_time = time.time() - start_time

        history['epoch'].append(ep)
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['test_loss'].append(test_loss)
        history['test_acc'].append(test_acc)
        history['time'].append(epoch_time)

        print(f"Epoch {ep} - Loss: {train_loss:.4f}, Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}")

    return student, history


In [24]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

class TeacherCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


Device: cpu


In [25]:
def train_model(model, optimizer, criterion, train_loader, test_loader, epochs=5):
    model = model.to(device)
    history = {'epoch': [], 'train_loss': [], 'train_acc': [], 'test_loss': [], 'test_acc': []}
    for ep in range(1, epochs+1):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * xb.size(0)
            _, preds = torch.max(out, 1)
            correct += (preds == yb).sum().item()
            total += xb.size(0)
        train_loss = running_loss / total
        train_acc = correct / total

        # Evaluation
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for xb, yb in test_loader:
                xb, yb = xb.to(device), yb.to(device)
                out = model(xb)
                loss = criterion(out, yb)
                test_loss += loss.item() * xb.size(0)
                _, preds = torch.max(out, 1)
                correct += (preds == yb).sum().item()
                total += xb.size(0)
        test_loss /= total
        test_acc = correct / total

        # Save stats
        history['epoch'].append(ep)
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['test_loss'].append(test_loss)
        history['test_acc'].append(test_acc)

        print(f"Epoch {ep}: Train Acc={train_acc:.4f}, Test Acc={test_acc:.4f}")

    return model, history


In [26]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_data, batch_size=128, shuffle=True)
test_loader = DataLoader(test_data, batch_size=256, shuffle=False)


In [27]:
teacher = TeacherCNN().to(device)
optimizer = optim.Adam(teacher.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

# Train the teacher
print("Training TeacherCNN on clean MNIST...")
teacher, teacher_history = train_model(teacher, optimizer, criterion, train_loader, test_loader, epochs=5)

# Save the model
torch.save(teacher.state_dict(), "teacher_pretrained.pth")
print("âœ… Teacher model saved as 'teacher_pretrained.pth'")


Training TeacherCNN on clean MNIST...
Epoch 1: Train Acc=0.9447, Test Acc=0.9817
Epoch 2: Train Acc=0.9845, Test Acc=0.9868
Epoch 3: Train Acc=0.9896, Test Acc=0.9890
Epoch 4: Train Acc=0.9915, Test Acc=0.9878
Epoch 5: Train Acc=0.9941, Test Acc=0.9913
âœ… Teacher model saved as 'teacher_pretrained.pth'


In [28]:
teacher = TeacherCNN().to(device)
teacher.load_state_dict(torch.load("teacher_pretrained.pth"))
teacher.eval()


TeacherCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [29]:
class StudentCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, 3, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.fc1 = nn.Linear(32 * 7 * 7, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [30]:
def distillation_loss(student_logits, teacher_logits, targets, temperature=4.0, alpha=0.7):
    kd_loss = nn.KLDivLoss(reduction='batchmean')(F.log_softmax(student_logits / temperature, dim=1),
                                                  F.softmax(teacher_logits / temperature, dim=1)) * (temperature ** 2)
    ce_loss = F.cross_entropy(student_logits, targets)
    return alpha * kd_loss + (1 - alpha) * ce_loss


In [31]:
def train_kd(student, teacher, train_loader, test_loader, epochs=5):
    optimizer = optim.Adam(student.parameters(), lr=1e-3)
    student = student.to(device)
    teacher = teacher.to(device)
    teacher.eval()

    hist = {'epoch': [], 'train_loss': [], 'train_acc': [], 'test_loss': [], 'test_acc': [], 'time': []}

    for ep in range(1, epochs + 1):
        student.train()
        train_loss, correct, total = 0, 0, 0

        import time
        start = time.time()

        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()

            teacher_out = teacher(xb)
            student_out = student(xb)

            loss = distillation_loss(student_out, teacher_out, yb)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * xb.size(0)
            preds = torch.argmax(student_out, dim=1)
            correct += (preds == yb).sum().item()
            total += xb.size(0)

        train_loss /= total
        train_acc = correct / total

        # --- Evaluate on test set ---
        student.eval()
        test_loss, correct, total = 0, 0, 0
        with torch.no_grad():
            for xb, yb in test_loader:
                xb, yb = xb.to(device), yb.to(device)
                out = student(xb)
                loss = F.cross_entropy(out, yb)
                test_loss += loss.item() * xb.size(0)
                preds = torch.argmax(out, dim=1)
                correct += (preds == yb).sum().item()
                total += xb.size(0)

        test_loss /= total
        test_acc = correct / total
        elapsed = time.time() - start

        # Save stats
        hist['epoch'].append(ep)
        hist['train_loss'].append(train_loss)
        hist['train_acc'].append(train_acc)
        hist['test_loss'].append(test_loss)
        hist['test_acc'].append(test_acc)
        hist['time'].append(elapsed)

        print(f"Epoch {ep}: train_acc={train_acc:.4f}, test_acc={test_acc:.4f}, time={elapsed:.2f}s")

    return student, hist


In [32]:
teacher = TeacherCNN().to(device)
teacher.load_state_dict(torch.load("teacher_pretrained.pth"))
teacher.eval()

student = StudentCNN().to(device)

print("\nðŸ“š Training student via knowledge distillation on CLEAN MNIST...")
student, hist_kd_clean = train_kd(student, teacher, train_loader, test_loader, epochs=5)



ðŸ“š Training student via knowledge distillation on CLEAN MNIST...
Epoch 1: train_acc=0.9279, test_acc=0.9827, time=29.88s
Epoch 2: train_acc=0.9856, test_acc=0.9879, time=29.51s
Epoch 3: train_acc=0.9896, test_acc=0.9890, time=29.89s
Epoch 4: train_acc=0.9914, test_acc=0.9899, time=30.48s
Epoch 5: train_acc=0.9926, test_acc=0.9895, time=30.25s


In [33]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Add Gaussian noise to images
class AddGaussianNoise(object):
    def __init__(self, mean=0., std=0.2):
        self.mean = mean
        self.std = std

    def __call__(self, tensor):
        noise = torch.randn(tensor.size()) * self.std + self.mean
        noisy_tensor = tensor + noise
        return torch.clamp(noisy_tensor, 0., 1.)

# === Transforms for training ===
transform_noisy = transforms.Compose([
    transforms.ToTensor(),
    AddGaussianNoise(0., 0.3),         # Adjust std as needed
    transforms.Normalize((0.1307,), (0.3081,))
])

transform_clean = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# === Datasets ===
train_noisy = datasets.MNIST(root='./data', train=True, download=True, transform=transform_noisy)
test_clean  = datasets.MNIST(root='./data', train=False, download=True, transform=transform_clean)

train_loader_noisy = DataLoader(train_noisy, batch_size=128, shuffle=True)
test_loader_clean  = DataLoader(test_clean, batch_size=256, shuffle=False)


In [34]:
teacher = TeacherCNN().to(device)
teacher.load_state_dict(torch.load("teacher_pretrained.pth"))
teacher.eval()

student_noisy = StudentCNN().to(device)


In [35]:
print("\nðŸ“š Training student via knowledge distillation on NOISY MNIST...")
student_noisy, hist_kd_noisy = train_kd(student_noisy, teacher, train_loader_noisy, test_loader_clean, epochs=5)



ðŸ“š Training student via knowledge distillation on NOISY MNIST...
Epoch 1: train_acc=0.8356, test_acc=0.9711, time=33.71s
Epoch 2: train_acc=0.9418, test_acc=0.9803, time=30.63s
Epoch 3: train_acc=0.9449, test_acc=0.9849, time=30.27s
Epoch 4: train_acc=0.9465, test_acc=0.9850, time=31.19s
Epoch 5: train_acc=0.9473, test_acc=0.9865, time=32.94s


In [36]:
import pandas as pd

# Simulated data structure (replace with your actual history dicts)
# hist_kd_clean = {'train_loss': [...], 'test_loss': [...], 'train_acc': [...], 'test_acc': [...], 'time': [...]}
# hist_kd_noisy = {'train_loss': [...], 'test_loss': [...], 'train_acc': [...], 'test_acc': [...], 'time': [...]}

# Final values
comparison_data = {
    'Metric': [
        'Final Train Loss',
        'Final Test Loss',
        'Final Train Accuracy (%)',
        'Final Test Accuracy (%)',
        'Total Training Time (s)'
    ],
    'Clean KD': [
        round(hist_kd_clean['train_loss'][-1], 4),
        round(hist_kd_clean['test_loss'][-1], 4),
        round(hist_kd_clean['train_acc'][-1] * 100, 2),
        round(hist_kd_clean['test_acc'][-1] * 100, 2),
        round(sum(hist_kd_clean['time']), 2)
    ],
    'Noisy KD': [
        round(hist_kd_noisy['train_loss'][-1], 4),
        round(hist_kd_noisy['test_loss'][-1], 4),
        round(hist_kd_noisy['train_acc'][-1] * 100, 2),
        round(hist_kd_noisy['test_acc'][-1] * 100, 2),
        round(sum(hist_kd_noisy['time']), 2)
    ]
}

# Create the table
df_comparison = pd.DataFrame(comparison_data)

# Display in notebook or save as CSV
print(df_comparison)

# Optional: Save to file
df_comparison.to_csv("kd_clean_vs_noisy_comparison.csv", index=False)


                     Metric  Clean KD  Noisy KD
0          Final Train Loss    0.0886    0.1920
1           Final Test Loss    0.0306    0.0416
2  Final Train Accuracy (%)   99.2600   94.7300
3   Final Test Accuracy (%)   98.9500   98.6500
4   Total Training Time (s)  150.0100  158.7500


In [37]:
import matplotlib.pyplot as plt
import matplotlib

# Prevent kernel crash in VS Code if needed
matplotlib.use('Agg')  # <- remove this line if you want interactive display

epochs = hist_kd_clean['epoch']

# === Accuracy ===
plt.figure(figsize=(10, 5))
plt.plot(epochs, hist_kd_clean['train_acc'], 'b-o', label="Clean KD - Train Acc")
plt.plot(epochs, hist_kd_clean['test_acc'],  'b--', label="Clean KD - Test Acc")
plt.plot(epochs, hist_kd_noisy['train_acc'], 'r-o', label="Noisy KD - Train Acc")
plt.plot(epochs, hist_kd_noisy['test_acc'],  'r--', label="Noisy KD - Test Acc")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Train vs Test Accuracy: Clean vs Noisy KD")
plt.legend()
plt.tight_layout()
plt.savefig("kd_accuracy_comparison.png")
plt.show()
plt.close()

# === Loss ===
plt.figure(figsize=(10, 5))
plt.plot(epochs, hist_kd_clean['train_loss'], 'b-o', label="Clean KD - Train Loss")
plt.plot(epochs, hist_kd_clean['test_loss'],  'b--', label="Clean KD - Test Loss")
plt.plot(epochs, hist_kd_noisy['train_loss'], 'r-o', label="Noisy KD - Train Loss")
plt.plot(epochs, hist_kd_noisy['test_loss'],  'r--', label="Noisy KD - Test Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Train vs Test Loss: Clean vs Noisy KD")
plt.legend()
plt.tight_layout()
plt.savefig("kd_loss_comparison.png")
plt.show()
plt.close()

# === Time ===
plt.figure(figsize=(8, 5))
plt.plot(epochs, hist_kd_clean['time'], 'b-o', label="Clean KD Time")
plt.plot(epochs, hist_kd_noisy['time'], 'r-o', label="Noisy KD Time")
plt.xlabel("Epochs")
plt.ylabel("Time (s)")
plt.title("Training Time per Epoch: Clean vs Noisy KD")
plt.legend()
plt.tight_layout()
plt.savefig("kd_time_comparison.png")
plt.show()
plt.close()


  plt.show()
  plt.show()
  plt.show()
