In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install detectors -q

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F


In [None]:
import torch,timm,detectors
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.nn import KLDivLoss
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                         std=[0.2023, 0.1994, 0.2010])
])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=128 , shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
teacher_model = timm.create_model("resnet34_cifar10", pretrained=True).to(device)
teacher_model.eval()

In [None]:
# Define the StudentCNN model
class StudentCNN(nn.Module):
    def __init__(self):
        super(StudentCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)  # Adjusted for CIFAR-10
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # 32x32 -> 16x16
        x = self.pool(F.relu(self.conv2(x)))  # 16x16 -> 8x8
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [None]:
def train_with_kl_and_ce(student_model, teacher_model, loader, optimizer,scheduler, epochs=150, temperature=4, alpha=0.7):
    kl_loss = nn.KLDivLoss(reduction='batchmean')
    ce_loss = nn.CrossEntropyLoss()
    student_model.train()

    for epoch in range(epochs):
        total_loss = 0
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)

            with torch.no_grad():
                teacher_logits = teacher_model(images)
                teacher_probs = F.softmax(teacher_logits / temperature, dim=1)

            optimizer.zero_grad()
            student_logits = student_model(images)

            # KL Divergence Loss
            student_log_probs = F.log_softmax(student_logits / temperature, dim=1)
            loss_kl = kl_loss(student_log_probs, teacher_probs) * (temperature ** 2)

            # Cross-Entropy Loss
            loss_ce = ce_loss(student_logits, labels)

            # Combined Loss
            loss = alpha * loss_kl + (1 - alpha) * loss_ce

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()

        print(f"Epoch {epoch+1}, Total Loss: {total_loss:.4f}")


In [None]:
student_model_kl = StudentCNN()

In [None]:


student_model_kl.to(device)
optimizer_kl = torch.optim.SGD(
    student_model.parameters(),
    lr=0.1,              # initial learning rate
    momentum=0.9,
    weight_decay=5e-4
)


scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer,
    T_max=epochs  # total number of epochs
)

train_with_kl_and_ce(student_model_kl, teacher_model, train_loader, optimizer_kl,scheduler)

In [None]:
torch.save(student_model_kl, '/kaggle/working/model.pth')

In [None]:
correct = 0
total = 0
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=128 , shuffle=False)
with torch.no_grad():
    for images, labels in test_loader:
        outputs = student_model_kl(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.to(device)).sum().item()

accuracy = 100 * correct / total
print(f'Test Accuracy: {accuracy:.2f}%')