<a href="https://colab.research.google.com/github/pradyot-09/DL-Reproducibility-Project/blob/master/distillation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch
device = 'cuda'

In [0]:
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
    def __init__(self, hidden_size=1200):
        super(Model, self).__init__()
        self.hidden1 = nn.Linear(784, hidden_size, bias=True)
        self.hidden2 = nn.Linear(hidden_size, hidden_size, bias=True)
        self.hidden3 = nn.Linear(hidden_size, 10, bias=True)

    def forward(self, x):
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        return F.softmax(self.hidden3(x))

In [0]:
import torchvision
import torchvision.transforms as transforms
transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.5,), (0.5,))
])

batch_size = 256

trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=2)

In [0]:
import torch.optim as optim

net = Model()
net.to(device)
criterion = torch.nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [0]:
def convert_labels(labels):
    target = torch.zeros([len(labels), 10], dtype=torch.float32)
    for i, l in enumerate(labels):
      target[i][l] = 1.0
    return target

In [0]:
for epoch in range(2):  # loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = torch.flatten(inputs, start_dim=1).to(device)
        target = convert_labels(labels).to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, target)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 50 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 50))
            running_loss = 0.0

print('Finished Training')

  


[1,    50] loss: 0.089
[1,   100] loss: 0.089
[1,   150] loss: 0.089
[1,   200] loss: 0.089
[2,    50] loss: 0.089
[2,   100] loss: 0.088
[2,   150] loss: 0.088
[2,   200] loss: 0.088
Finished Training


In [0]:
torch.cuda.get_device_name(0)
PATH = './mnist_default.pth'

In [0]:
torch.save(net.state_dict(), PATH)

In [0]:
net = Model().to(device)
net.load_state_dict(torch.load(PATH))

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        inputs = torch.flatten(inputs, start_dim=1).to(device)
        target = convert_labels(labels).to(device)
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        _, target = torch.max(target.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

  


Accuracy of the network on the 10000 test images: 21 %
