In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
from IPython import display
import matplotlib.pyplot as plt

torch.manual_seed(111)
torch.cuda.manual_seed(111)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)

In [None]:
class fire(nn.Module):
    def __init__(self, inplanes, squeeze_planes, expand_planes):
        super(fire, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1, stride=1)
        self.bn1 = nn.BatchNorm2d(squeeze_planes)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=1, stride=1)
        self.bn2 = nn.BatchNorm2d(expand_planes)
        self.conv3 = nn.Conv2d(squeeze_planes, expand_planes, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(expand_planes)
        self.relu2 = nn.ReLU(inplace=True)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)

        out1 = self.conv2(x)
        out1 = self.bn2(out1)
        out2 = self.conv3(x)
        out2 = self.bn3(out2)
        # print(out1.shape, out2.shape)

        out = torch.cat([out1, out2], 1)
        out = self.relu2(out)
        return out
    
fire_module = fire(96, 16, 64)
fire_module(torch.randn(1, 96, 32, 32)).shape

In [None]:
class SqueezeNet(nn.Module):
    def __init__(self):
        super(SqueezeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1) # 32
        self.bn1 = nn.BatchNorm2d(96)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2) # 16
        self.fire2 = fire(96, 16, 64)
        self.fire3 = fire(128, 16, 64)
        self.fire4 = fire(128, 32, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2) # 8
        self.fire5 = fire(256, 32, 128)
        self.fire6 = fire(256, 48, 192)
        self.fire7 = fire(384, 48, 192)
        self.fire8 = fire(384, 64, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2) # 4
        self.fire9 = fire(512, 64, 256)
        self.conv2 = nn.Conv2d(512, 10, kernel_size=1, stride=1)
        self.avg_pool = nn.AvgPool2d(kernel_size=4, stride=4)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool1(x)
        x = self.fire2(x)
        x = self.fire3(x)
        x = self.fire4(x)
        x = self.maxpool2(x)
        x = self.fire5(x)
        x = self.fire6(x)
        x = self.fire7(x)
        x = self.fire8(x)
        x = self.maxpool3(x)
        x = self.fire9(x)
        x = self.conv2(x)
        x = self.avg_pool(x)
        x = self.softmax(x)
        return x

model = SqueezeNet().to(device)
x, y = next(iter(trainloader))[0].to(device), next(iter(trainloader))[1].to(device)
model(x).shape

In [None]:
total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
train_losses = []
val_losses = []

In [None]:
# start = time.time()
for epoch in range(30):
    # plt.figure(1)

    train_loss = 0
    model.train(True)
    for indx, inp in enumerate(trainloader):
        optimizer.zero_grad()
        x = inp[0].to(device)
        y = inp[1].to(device)

        out = model(x).view(-1, 10)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()/x.size(0)
    train_loss = train_loss/(indx+1)

    val_loss = 0
    model.eval()
    with torch.no_grad():
        for indx, inp in enumerate(testloader):
            x = inp[0].to(device)
            y = inp[1].to(device)
            
            out = model(x).view(-1, 10)
            accuracy = (out.data.max(1)[1] == y).cpu().sum() * 100 / y.size(0)
            loss = criterion(out, y)
            val_loss += loss.item()/x.size(0)
        val_loss = val_loss / (indx+1)

    train_losses.append(train_loss)
    val_losses.append(val_loss)

    display.clear_output(wait=True)
    print("Epoch: ", epoch, "\tTrain Loss: ", train_loss, '\tVal Loss: ', val_loss)
    plt.plot(train_losses, label='Training Loss')
    plt.plot(val_losses, label='Validation Loss')
    plt.legend()
    plt.show()

# end = time.time()
# print('Time taken', (end-start)/60)

In [None]:
y