In [None]:
!pip install optuna onnx onnx_tf

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models, datasets
import matplotlib.pyplot as plt
from torch.utils.data import Subset
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import os
import optuna
from tqdm import tqdm
import torch.nn.functional as F

In [None]:
# ImageNet Normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Define transformations
transform_train = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize images for ResNet50
    transforms.RandomHorizontalFlip(),  # Data Augmentation
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

# Define number of samples
num_train_samples = 1500
num_test_samples = 500

# Get indices for the subset
train_indices = np.random.choice(len(trainset), num_train_samples, replace=False)
test_indices = np.random.choice(len(testset), num_test_samples, replace=False)

# Create subset datasets
train_subset = Subset(trainset, train_indices[:1000])
val_subset = Subset(trainset, train_indices[1000:])
test_subset = Subset(testset, test_indices)

# Create DataLoaders
batch_size = 128

# Load training data
trainloader = torch.utils.data.DataLoader(train_subset, batch_size=batch_size, shuffle=True, num_workers=2)

# Load training data
valloader = torch.utils.data.DataLoader(val_subset, batch_size=batch_size, shuffle=True, num_workers=2)

# Load test data
testloader = torch.utils.data.DataLoader(test_subset, batch_size=batch_size, shuffle=False, num_workers=2)

In [None]:
len(trainloader)

In [None]:
# Get class names
classes = trainset.classes
print(classes)

In [None]:
trainset[0][0].shape

In [None]:
# Define mean and std used for normalization (e.g., CIFAR-10 normalization)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

def denormalize(img):
    """Undo normalization: img * std + mean"""
    img = img * std[:] + mean[:]  # Unnormalize
    return np.clip(img, 0, 1)  # Clip values to [0,1] for imshow()

def show_images_in_rows(images, labels, num_rows=1, num_cols=5):
    fig, axes = plt.subplots(num_rows, num_cols, figsize=(num_cols * 2, num_rows * 2))
    axes = axes.flatten()

    for i in range(num_rows * num_cols):
        if i < len(images):  # Ensure we don't go out of bounds
            img = images[i].numpy().transpose((1, 2, 0))  # Convert (C, H, W) -> (H, W, C)
            img = denormalize(img)  # Denormalize
            axes[i].imshow(img)
            axes[i].set_title(f"Label: {labels[i]}")
            axes[i].axis('off')  # Hide axis
        else:
            axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Get a small batch of 4 images
dataiter = iter(trainloader)
images, labels = next(dataiter)

# Select only the first 4 images
images, labels = images[:4], labels[:4]

# Show images
show_images_in_rows(images, [classes[labels[j].item()] for j in range(4)], 1, 4)

In [None]:
class CustomResNet(nn.Module):
    def __init__(self, num_classes, intermediate_dim, dropout1_rate, dropout2_rate):
        super(CustomResNet, self).__init__()
        self.resnet = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V2)
        num_ftrs = self.resnet.fc.in_features

        self.resnet.fc = nn.Sequential(
            nn.Linear(num_ftrs, intermediate_dim),
            nn.Dropout(p=dropout1_rate),
            nn.ReLU(),
            nn.Dropout(p=dropout2_rate),
            nn.Linear(intermediate_dim, num_classes)
        )

    def forward(self, x):
        return self.resnet(x)

In [None]:
def create_model(trial, num_classes):
    # Suggest hyperparameters for the model architecture
    intermediate_dim = trial.suggest_int('intermediate_dim', 256, 2048, step=256)
    dropout1_rate = trial.suggest_float('dropout1_rate', 0.1, 0.5)
    dropout2_rate = trial.suggest_float('dropout2_rate', 0.1, 0.5)

    model = CustomResNet(
        num_classes=num_classes,
        intermediate_dim=intermediate_dim,
        dropout1_rate=dropout1_rate,
        dropout2_rate=dropout2_rate
    )
    return model

def objective(trial, train_dataset, val_dataset, num_classes):
    # Define hyperparameters to optimize
    batch_size = trial.suggest_int('batch_size', 16, 128, step=16)
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=2
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=2
    )

    # Initialize model with architecture hyperparameters
    model = create_model(trial, num_classes)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Set up optimizer based on trial suggestion
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        momentum = trial.suggest_float('momentum', 0.1, 0.9)
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

    criterion = nn.CrossEntropyLoss()

    # Training loop
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Validation phase
        model.eval()
        correct = 0
        total = 0
        val_loss = 0.0

        with torch.no_grad():
            for images, labels in valloader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = correct / total

        # Report intermediate values to Optuna
        trial.report(accuracy, epoch)

        # Handle pruning based on the intermediate value
        if trial.should_prune():
            raise optuna.TrialPruned()

    return accuracy

def run_optuna_optimization(train_dataset, val_dataset, num_classes, n_trials=100):
    study = optuna.create_study(
        direction="maximize",
        pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5),
        study_name="resnet50_optimization"
    )

    study.optimize(
        lambda trial: objective(trial, train_dataset, val_dataset, num_classes),
        n_trials=n_trials
    )

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    return study.best_params

In [None]:
best_params = run_optuna_optimization(
    train_dataset=train_subset,
    val_dataset=val_subset,
    num_classes=len(classes),
    n_trials=20
)

In [None]:
def train_final_model(train_dataset, val_dataset, best_params, num_classes, save_path='best_model.pth'):
    # Create model with best parameters
    model = CustomResNet(
        num_classes=num_classes,
        intermediate_dim=best_params['intermediate_dim'],
        dropout1_rate=best_params['dropout1_rate'],
        dropout2_rate=best_params['dropout2_rate']
    )

    # Create data loaders with best batch size
    train_loader = DataLoader(
        train_dataset,
        batch_size=best_params['batch_size'],
        shuffle=True,
        num_workers=2
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=best_params['batch_size'],
        shuffle=False,
        num_workers=2
    )

    # Setup device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    # Setup optimizer with best parameters
    if best_params['optimizer'] == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=best_params['lr'])
    else:
        optimizer = optim.SGD(
            model.parameters(),
            lr=best_params['lr'],
            momentum=best_params['momentum']
        )

    criterion = nn.CrossEntropyLoss()
    num_epochs = best_params['num_epochs']

    # Keep track of best validation accuracy
    best_val_acc = 0.0

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

            # Update progress bar
            progress_bar.set_postfix({
                'loss': f'{train_loss/train_total:.4f}',
                'acc': f'{100.*train_correct/train_total:.2f}%'
            })

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        # Calculate metrics
        train_accuracy = 100. * train_correct / train_total
        val_accuracy = 100. * val_correct / val_total

        print(f'\nEpoch {epoch+1}/{num_epochs}:')
        print(f'Training Loss: {train_loss/len(train_loader):.4f}, Training Accuracy: {train_accuracy:.2f}%')
        print(f'Validation Loss: {val_loss/len(val_loader):.4f}, Validation Accuracy: {val_accuracy:.2f}%')

        # Save best model
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            print(f'New best validation accuracy: {best_val_acc:.2f}%')
            print(f'Saving model to {save_path}')

            # Create directory if it doesn't exist
            os.makedirs(os.path.dirname(save_path) if os.path.dirname(save_path) else '.', exist_ok=True)

            # Save the model
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_acc': best_val_acc,
                'hyperparameters': best_params
            }, save_path)

    print(f"\nTraining completed! Best validation accuracy: {best_val_acc:.2f}%")
    return model

# Then train the final model with best parameters
final_model = train_final_model(
    train_dataset=train_subset,
    val_dataset=val_subset,
    best_params=best_params,
    num_classes=len(classes),
    save_path='models/best_resnet50_model.pth'
)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize lists to store true labels and predictions
all_preds = []
all_labels = []

#  num_classes, intermediate_dim, dropout1_rate, dropout2_rate
# Best trial:
#   Value:  0.86
#   Params:
#     batch_size: 80
#     lr: 9.035438144665984e-05
#     optimizer: Adam
#     intermediate_dim: 1024
#     dropout1_rate: 0.3055373006787046
#     dropout2_rate: 0.10413266283398273
#     num_epochs: 15
checkpoint = torch.load("/kaggle/input/best_resnet50_for_cifar10/pytorch/default/1/best_resnet50_model.pth")

model = CustomResNet(10,
                     checkpoint["hyperparameters"]["intermediate_dim"],
                     checkpoint["hyperparameters"]["dropout1_rate"],
                     checkpoint["hyperparameters"]["dropout2_rate"]
                     )
optimizer = torch.optim.Adam(model.parameters(), checkpoint["hyperparameters"]['lr'])

# Load saved state dictionaries
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Restore additional information
epoch = checkpoint['epoch']
best_val_acc = checkpoint['best_val_acc']
hyperparameters = checkpoint['hyperparameters']

print(f"Model loaded from epoch {epoch} with best validation accuracy: {best_val_acc}")

In [None]:
# Evaluate the model
model.to(device)
model.eval()  # Set model to evaluation mode

with torch.no_grad():  # No need to calculate gradients
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)  # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get the index of the highest score

        all_preds.extend(predicted.cpu().numpy())  # Store predictions
        all_labels.extend(labels.cpu().numpy())  # Store true labels

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

# Create confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Plot confusion matrix
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Initialize lists to store misclassified samples
wrong_images = []
wrong_preds = []
wrong_labels = []

# Evaluate the model
model.eval()  # Set model to evaluation mode

with torch.no_grad():  # No need to calculate gradients
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)  # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get predicted label

        # Find misclassified samples
        mask = predicted != labels
        if mask.any():  # If there are any wrong predictions
            wrong_images.extend(images[mask].cpu())  # Store wrong images
            wrong_preds.extend(predicted[mask].cpu().numpy())  # Store wrong predictions
            wrong_labels.extend(labels[mask].cpu().numpy())  # Store actual labels


def show_misclassified_images(images, true_labels, pred_labels, num_images=10):
    num_images = min(num_images, len(images))  # Ensure we don't go out of bounds
    fig, axes = plt.subplots(1, num_images, figsize=(num_images * 2, 2))

    for i in range(num_images):
        img = images[i].numpy().transpose((1, 2, 0))  # Convert from (C, H, W) to (H, W, C)
        img = img * std + mean  # Denormalize
        img = np.clip(img, 0, 1)  # Clip values for display

        axes[i].imshow(img)
        axes[i].set_title(f"T: {classes[true_labels[i]]}\nP: {classes[pred_labels[i]]}", fontsize=10)
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Show misclassified images
show_misclassified_images(wrong_images, wrong_labels, wrong_preds, num_images=10)

In [None]:
class StudentResNet(nn.Module):
    def __init__(self, num_classes, intermediate_dim, dropout1_rate, dropout2_rate):
        super(StudentResNet, self).__init__()
        # Using ResNet18 as student (smaller than ResNet50)
        self.resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        num_ftrs = self.resnet.fc.in_features

        # Same architecture as teacher for the final layers
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_ftrs, intermediate_dim),
            nn.Dropout(p=dropout1_rate),
            nn.ReLU(),
            nn.Dropout(p=dropout2_rate),
            nn.Linear(intermediate_dim, num_classes)
        )

    def forward(self, x):
        return self.resnet(x)

In [None]:
def kd_loss(student_logits, teacher_logits, labels, temperature=3, alpha=0.7):
    """
    Knowledge Distillation Loss: combines KL Divergence and CrossEntropy Loss.
    """
    soft_targets = F.log_softmax(student_logits / temperature, dim=1)
    soft_teacher = F.softmax(teacher_logits / temperature, dim=1)

    kd_loss = F.kl_div(soft_targets, soft_teacher, reduction='batchmean') * (temperature ** 2)
    ce_loss = nn.CrossEntropyLoss()(student_logits, labels)

    return alpha * kd_loss + (1 - alpha) * ce_loss

In [None]:
def objective(trial, teacher_model, train_dataset, val_dataset, num_classes):
    # Architecture hyperparameters
    intermediate_dim = trial.suggest_int('intermediate_dim', 128, 1024, step=128)
    dropout1_rate = trial.suggest_float('dropout1_rate', 0.1, 0.5)
    dropout2_rate = trial.suggest_float('dropout2_rate', 0.1, 0.5)

    # Distillation hyperparameters
    temperature = trial.suggest_float('temperature', 1.0, 10.0)
    alpha = trial.suggest_float('alpha', 0.1, 0.9)

    # Training hyperparameters
    batch_size = trial.suggest_int('batch_size', 16, 128, step=16)
    lr = trial.suggest_float('lr', 1e-5, 1e-2, log=True)
    optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'SGD'])

    # Create student model
    student_model = StudentResNet(
        num_classes=num_classes,
        intermediate_dim=intermediate_dim,
        dropout1_rate=dropout1_rate,
        dropout2_rate=dropout2_rate
    )

    # Setup device and move models
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    student_model = student_model.to(device)
    teacher_model = teacher_model.to(device)
    teacher_model.eval()

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    # Setup optimizer
    if optimizer_name == 'Adam':
        optimizer = optim.Adam(student_model.parameters(), lr=lr)
    else:
        momentum = trial.suggest_float('momentum', 0.1, 0.9)
        optimizer = optim.SGD(student_model.parameters(), lr=lr, momentum=momentum)

    # Training loop
    num_epochs = trial.suggest_int('num_epochs', 5, 15)

    for epoch in range(num_epochs):
        student_model.train()
        running_loss = 0.0

        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Get teacher predictions
            with torch.no_grad():
                teacher_logits = teacher_model(images)

            # Forward pass student
            student_logits = student_model(images)

            # Calculate loss
            loss = kd_loss(
                student_logits=student_logits,
                teacher_logits=teacher_logits,
                labels=labels,
                temperature=temperature,
                alpha=alpha
            )

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Validation phase
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = student_model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = correct / total
        trial.report(accuracy, epoch)

        if trial.should_prune():
            raise optuna.TrialPruned()

    return accuracy

In [None]:
def run_distillation_optimization(teacher_model, train_dataset, val_dataset, num_classes, n_trials=100):
    study = optuna.create_study(
        direction="maximize",
        pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=5),
        study_name="distillation_optimization"
    )

    study.optimize(
        lambda trial: objective(trial, teacher_model, train_dataset, val_dataset, num_classes),
        n_trials=n_trials
    )

    print("Best trial:")
    trial = study.best_trial
    print("  Value: ", trial.value)
    print("  Params: ")
    for key, value in trial.params.items():
        print(f"    {key}: {value}")

    return study.best_params

In [None]:
best_params = run_distillation_optimization(
    teacher_model=model,
    train_dataset=train_subset,
    val_dataset=val_subset,
    num_classes=len(classes),
    n_trials=20
)

In [None]:
def train_final_student(teacher_model, train_dataset, val_dataset, best_params, num_classes, save_path='best_student.pth'):
    # Create student model with best parameters
    student_model = StudentResNet(
        num_classes=num_classes,
        intermediate_dim=best_params['intermediate_dim'],
        dropout1_rate=best_params['dropout1_rate'],
        dropout2_rate=best_params['dropout2_rate']
    )

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    student_model = student_model.to(device)
    teacher_model = teacher_model.to(device)
    teacher_model.eval()

    # Create data loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=best_params['batch_size'],
        shuffle=True,
        num_workers=2
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=best_params['batch_size'],
        shuffle=False,
        num_workers=2
    )

    # Setup optimizer
    if best_params['optimizer'] == 'Adam':
        optimizer = optim.Adam(student_model.parameters(), lr=best_params['lr'])
    else:
        optimizer = optim.SGD(
            student_model.parameters(),
            lr=best_params['lr'],
            momentum=best_params['momentum']
        )

    best_val_acc = 0.0

    for epoch in range(best_params['num_epochs']):
        # Training phase
        student_model.train()
        train_loss = 0.0

        progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{best_params["num_epochs"]}')
        for images, labels in progress_bar:
            images, labels = images.to(device), labels.to(device)

            # Get teacher predictions
            with torch.no_grad():
                teacher_logits = teacher_model(images)

            # Forward pass student
            student_logits = student_model(images)

            # Calculate loss
            loss = kd_loss(
                student_logits=student_logits,
                teacher_logits=teacher_logits,
                labels=labels,
                temperature=best_params['temperature'],
                alpha=best_params['alpha']
            )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})

        # Validation phase
        student_model.eval()
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = student_model(images)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_accuracy = 100. * val_correct / val_total
        print(f'\nEpoch {epoch+1}: Validation Accuracy: {val_accuracy:.2f}%')

        # Save best model
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            print(f'New best validation accuracy: {best_val_acc:.2f}%')
            torch.save({
                'epoch': epoch,
                'model_state_dict': student_model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'best_val_acc': best_val_acc,
                'hyperparameters': best_params
            }, save_path)

    print(f"\nTraining completed! Best validation accuracy: {best_val_acc:.2f}%")
    return student_model

In [None]:
import os

directory = "models"
os.makedirs(directory, exist_ok=True) 
print(f"Directory '{directory}' created successfully!")


In [None]:
final_student = train_final_student(
    teacher_model=model,
    train_dataset=train_subset,
    val_dataset=val_subset,
    best_params=best_params,
    num_classes=len(classes),
    save_path='/kaggle/working/models/best_student.pth'
)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize lists to store true labels and predictions
all_preds = []
all_labels = []

#  num_classes, intermediate_dim, dropout1_rate, dropout2_rate

# Best trial:
#   Value:  0.86
#   Params:
#     batch_size: 80
#     lr: 9.035438144665984e-05
#     optimizer: Adam
#     intermediate_dim: 1024
#     dropout1_rate: 0.3055373006787046
#     dropout2_rate: 0.10413266283398273
#     num_epochs: 15
checkpoint = torch.load("/kaggle/working/models/best_student.pth")

student = StudentResNet(10,
                     checkpoint["hyperparameters"]["intermediate_dim"],
                     checkpoint["hyperparameters"]["dropout1_rate"],
                     checkpoint["hyperparameters"]["dropout2_rate"]
                     )
optimizer = torch.optim.SGD(student.parameters(), checkpoint["hyperparameters"]['lr'])

# Load saved state dictionaries
student.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

# Restore additional information
epoch = checkpoint['epoch']
best_val_acc = checkpoint['best_val_acc']
hyperparameters = checkpoint['hyperparameters']

print(f"Model loaded from epoch {epoch} with best validation accuracy: {best_val_acc}")

In [None]:
# Initialize lists to store true labels and predictions
all_preds = []
all_labels = []

# Evaluate the student
student.to(device)
student.eval()  # Set student to evaluation mode

with torch.no_grad():  # No need to calculate gradients
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = student(images)  # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get the index of the highest score

        all_preds.extend(predicted.cpu().numpy())  # Store predictions
        all_labels.extend(labels.cpu().numpy())  # Store true labels

In [None]:
# Create confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Plot confusion matrix
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("Confusion Matrix")
plt.show()

In [None]:
# Initialize lists to store misclassified samples
wrong_images = []
wrong_preds = []
wrong_labels = []

# Evaluate the student
student.eval()  # Set student to evaluation mode

with torch.no_grad():  # No need to calculate gradients
    for images, labels in testloader:
        images, labels = images.to(device), labels.to(device)

        outputs = student(images)  # Forward pass
        _, predicted = torch.max(outputs, 1)  # Get predicted label

        # Find misclassified samples
        mask = predicted != labels
        if mask.any():  # If there are any wrong predictions
            wrong_images.extend(images[mask].cpu())  # Store wrong images
            wrong_preds.extend(predicted[mask].cpu().numpy())  # Store wrong predictions
            wrong_labels.extend(labels[mask].cpu().numpy())  # Store actual labels


def show_misclassified_images(images, true_labels, pred_labels, num_images=10):
    num_images = min(num_images, len(images))  # Ensure we don't go out of bounds
    fig, axes = plt.subplots(1, num_images, figsize=(num_images * 2, 2))

    for i in range(num_images):
        img = images[i].numpy().transpose((1, 2, 0))  # Convert from (C, H, W) to (H, W, C)
        img = img * std + mean  # Denormalize
        img = np.clip(img, 0, 1)  # Clip values for display

        axes[i].imshow(img)
        axes[i].set_title(f"T: {classes[true_labels[i]]}\nP: {classes[pred_labels[i]]}", fontsize=10)
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Show misclassified images
show_misclassified_images(wrong_images, wrong_labels, wrong_preds, num_images=10)

In [None]:
import onnx

# Ensure the model is on the same device as the input
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
student = student.to(device)  # Move model to GPU/CPU

# Create dummy input and move it to the same device as the model
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Match model's input shape

# Define the ONNX save path
onnx_path = "/kaggle/working/models/onnx_model.onnx"

# Export the model to ONNX
torch.onnx.export(student, dummy_input, onnx_path,
                  export_params=True,
                  opset_version=11,
                  do_constant_folding=True,
                  input_names=['input'],
                  output_names=['output'])

print(f"Model successfully converted to ONNX and saved at {onnx_path}")


In [None]:
from onnx_tf.backend import prepare

# Load the ONNX model
onnx_model = onnx.load(onnx_path)

# Convert ONNX to TensorFlow
tf_rep = prepare(onnx_model)
tf_path = "/kaggle/working/models/tf_model"
tf_rep.export_graph(tf_path)

print(f"Model successfully converted to TensorFlow and saved at {tf_path}")


In [3]:
import tensorflow as tf

tf_path = "/kaggle/working/models/tf_model"

# Load the TensorFlow model
converter = tf.lite.TFLiteConverter.from_saved_model(tf_path)

# Convert the model to TFLite
tflite_model = converter.convert()

# Save the TFLite model
tflite_path = "/kaggle/working/models/best_resnet18_model_light.tflite"
with open(tflite_path, "wb") as f:
    f.write(tflite_model)

print(f"Model successfully converted to TensorFlow Lite and saved at {tflite_path}")

Model successfully converted to TensorFlow Lite and saved at /kaggle/working/models/best_resnet18_model_light.tflite
