In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from utils import calculate_metrics

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
# load ResNet50 model
model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet50',
                          pretrained=True)
# change the last layer to 10 classes
model.fc = nn.Linear(2048, 10)
# freeze all layers except the last layer
for param in model.parameters():
    param.requires_grad = False
for param in model.fc.parameters():
    param.requires_grad = True
# move model to GPU
model = model.to(device)

# load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=torchvision.transforms.ToTensor())
# create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-4)

Using cache found in C:\Users\saeedzou/.cache\torch\hub\pytorch_vision_v0.6.0


Files already downloaded and verified
Files already downloaded and verified


In [3]:
# define a function to train the model 
def train_model(model, train_loader, test_loader, criterion, optimizer, epochs=10):
    train_loss = []
    test_loss = []
    train_acc = []
    test_acc = []
    for epoch in range(epochs):
        # train
        model.train()
        train_loss_epoch = 0
        train_acc_epoch = 0
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss_epoch += loss.item()
            train_acc_epoch += (outputs.argmax(1) == labels).sum().item()
        train_loss_epoch /= len(train_loader)
        train_acc_epoch /= len(train_loader.dataset)
        train_loss.append(train_loss_epoch)
        train_acc.append(train_acc_epoch)
        # test
        model.eval()
        test_loss_epoch = 0
        test_acc_epoch = 0
        with torch.no_grad():
            for i, (images, labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss_epoch += loss.item()
                test_acc_epoch += (outputs.argmax(1) == labels).sum().item()
        test_loss_epoch /= len(test_loader)
        test_acc_epoch /= len(test_loader.dataset)
        test_loss.append(test_loss_epoch)
        test_acc.append(test_acc_epoch)
        print("Epoch: {}/{} Train Loss: {:.4f} Train Accuracy: {:.4f} Test Loss: {:.4f} Test Accuracy: {:.4f}".format(epoch+1, epochs, train_loss_epoch, train_acc_epoch, test_loss_epoch, test_acc_epoch))
    return train_loss, test_loss, train_acc, test_acc 

In [4]:
# train the model
train_loss, test_loss, train_acc, test_acc = train_model(model, train_loader, test_loader, criterion, optimizer, epochs=10)

# plot the training and test loss and accuracy
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(train_loss, label="Train Loss")
plt.plot(test_loss, label="Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(train_acc, label="Train Accuracy")
plt.plot(test_acc, label="Test Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

Epoch: 1/10 Train Loss: 1.9048 Train Accuracy: 0.3446 Test Loss: 1.6868 Test Accuracy: 0.4388
Epoch: 2/10 Train Loss: 1.6564 Train Accuracy: 0.4387 Test Loss: 1.5781 Test Accuracy: 0.4740
Epoch: 3/10 Train Loss: 1.5869 Train Accuracy: 0.4621 Test Loss: 1.5444 Test Accuracy: 0.4820


In [None]:
test_dataset.classes

In [None]:
calculate_metrics(model, test_loader, device=device, classes=test_dataset.classes)

In [None]:
# load ResNet18 model
student_model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18',
                            pretrained=True)
# change the last layer to 10 classes
student_model.fc = nn.Linear(512, 10)
# move model to GPU
student_model = student_model.to(device)

# define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(student_model.fc.parameters(), lr=1e-4)


In [None]:
# define a function to train the student model
def train_student_model(student_model, teacher_model, train_loader, test_loader, criterion, optimizer, epochs=10, T=10, alpha=0.5):
    train_loss = []
    test_loss = []
    train_acc = []
    test_acc = []
    for epoch in range(epochs):
        # train
        student_model.train()
        train_loss_epoch = 0
        train_acc_epoch = 0
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = student_model(images)
            with torch.no_grad():
                teacher_outputs = teacher_model(images)
            loss = alpha * criterion(outputs, labels) + (1-alpha) * T**2 * criterion(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1))
            loss.backward()
            optimizer.step()
            train_loss_epoch += loss.item()
            train_acc_epoch += (outputs.argmax(1) == labels).sum().item()
        train_loss_epoch /= len(train_loader)
        train_acc_epoch /= len(train_loader.dataset)
        train_loss.append(train_loss_epoch)
        train_acc.append(train_acc_epoch)
        # test
        student_model.eval()
        test_loss_epoch = 0
        test_acc_epoch = 0
        with torch.no_grad():
            for i, (images, labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = student_model(images)
                loss = criterion(outputs, labels)
                test_loss_epoch += loss.item()
                test_acc_epoch += (outputs.argmax(1) == labels).sum().item()
        test_loss_epoch /= len(test_loader)
        test_acc_epoch /= len(test_loader.dataset)
        test_loss.append(test_loss_epoch)
        test_acc.append(test_acc_epoch)
        print("Epoch: {}/{} Train Loss: {:.4f} Train Accuracy: {:.4f} Test Loss: {:.4f} Test Accuracy: {:.4f}".format(epoch+1, epochs, train_loss_epoch, train_acc_epoch, test_loss_epoch, test_acc_epoch))
    return train_loss, test_loss, train_acc, test_acc

In [1]:
# define a function to find the best alpha and T
def find_best_alpha_T(student_model, teacher_model, train_loader, test_loader, criterion, optimizer, epochs=10, T_list=[1, 10, 100], alpha_list=[0.1, 0.5, 0.9]):
    best_acc = 0
    best_alpha = 0
    best_T = 0
    for T in T_list:
        for alpha in alpha_list:
            student_model = torch.hub.load('pytorch/vision:v0.6.0', 'resnet18',
                                        pretrained=True)
            student_model.fc = nn.Linear(512, 10)
            student_model = student_model.to(device)
            train_loss, test_loss, train_acc, test_acc = train_student_model(student_model, teacher_model, train_loader, test_loader, criterion, optimizer, epochs=epochs, T=T, alpha=alpha)
            if test_acc[-1] > best_acc:
                best_acc = test_acc[-1]
                best_alpha = alpha
                best_T = T
    return best_acc, best_alpha, best_T

# find the best alpha and T
best_acc, best_alpha, best_T = find_best_alpha_T(student_model, model, train_loader, test_loader, criterion, optimizer, epochs=10, T_list=[1, 10, 100], alpha_list=[0.1, 0.5, 0.9])

NameError: name 'student_model' is not defined

In [None]:
# train the student model
student_train_loss, student_test_loss, student_train_acc, student_test_acc = train_student_model(student_model, model, train_loader, test_loader, criterion, optimizer, epochs=10, T=10, alpha=0.5)