In [None]:
import torch
import torchvision
from torchvision import transforms as T
import torch.nn.functional as F
import torch.nn as nn

In [None]:
class AlexNet(nn.Module):
    def __init__(self, num_classes=10):
        super(AlexNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 96, kernel_size=11, stride=4, padding=0),
            nn.BatchNorm2d(96),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(96, 256, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.layer3 = nn.Sequential(
            nn.Conv2d(256, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer4 = nn.Sequential(
            nn.Conv2d(384, 384, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(384),
            nn.ReLU())
        self.layer5 = nn.Sequential(
            nn.Conv2d(384, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2))
        self.fc = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(9216, 4096),
            nn.ReLU())
        self.fc1 = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU())
        self.fc2= nn.Sequential(
            nn.Linear(4096, num_classes))
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

In [None]:
def get_loss_function():
  loss_function = torch.nn.CrossEntropyLoss()
  return loss_function

In [None]:
def get_optimizer(net, lr, wd, momentum):
  optimizer = torch.optim.SGD(net.parameters(), lr=lr, weight_decay=wd, momentum=momentum)
  return optimizer

In [None]:
# train function

def train(net,data_loader,optimizer,loss_function, device='cuda:0'):
    samples = 0.
    cumulative_loss = 0.
    cumulative_accuracy = 0.
    net.train() # Strictly needed if network contains layers which has different behaviours between train and test
    for batch_idx, (inputs, targets) in enumerate(data_loader): 
      # Load data into GPU
      inputs, targets = inputs.to(device), targets.to(device)
      outputs = net(inputs) # Forward pass
      loss = loss_function(outputs,targets) # Apply the loss
      loss.backward() # Backward pass
      optimizer.step() # Update parameters
      optimizer.zero_grad() # Reset the optimizer
      samples += inputs.shape[0]
      cumulative_loss += loss.item() 
      _, predicted = outputs.max(1) 
      cumulative_accuracy += predicted.eq(targets).sum().item() 
    return cumulative_loss/samples, cumulative_accuracy/samples*100

In [None]:
# test function

def test(net, data_loader, loss_function, device='cuda:0'):
  samples = 0.
  cumulative_loss = 0.
  cumulative_accuracy = 0.
  net.eval() # Strictly needed if network contains layers which have different behaviours between train and test
  with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(data_loader):
      # Load data into GPU
      inputs, targets = inputs.to(device), targets.to(device)
      # Forward pass
      outputs = net(inputs)
      _, predicted = outputs.max(1)
      loss = loss_function(outputs, targets)
      samples += inputs.shape[0]
      cumulative_loss += loss.item()
      cumulative_accuracy += predicted.eq(targets).sum().item()
  return cumulative_loss/samples, cumulative_accuracy/samples*100

In [None]:
# loading the data

def get_data(batch_size, test_batch_size=256): 
  # Prepare data transformations and then combine them sequentially
  transform = list() 
  transform.append(T.Resize((227,227)))
  transform.append(T.ToTensor()) # Converts Numpy to Pytorch Tensor
  transform.append(T.Normalize(mean=[0.5], std=[0.5])) # Normalizes the Tensors between [-1, 1]
  transform = T.Compose(transform) # Composes the above transformations into one.
  # Load data
  full_training_data = torchvision.datasets.CIFAR10('./data', train=True, transform=transform, download=True)
  test_data = torchvision.datasets.CIFAR10('./data', train=False, transform=transform, download=True) 
  # Create train and validation splits
  num_samples = len(full_training_data) 
  training_samples = int(num_samples*0.8+1)
  validation_samples = num_samples - training_samples
  training_data, validation_data = torch.utils.data.random_split(full_training_data, [training_samples, 
  validation_samples]) 
  # Initialize dataloaders
  train_loader = torch.utils.data.DataLoader(training_data, batch_size, shuffle=True)
  val_loader = torch.utils.data.DataLoader(validation_data, test_batch_size, shuffle=False)
  test_loader = torch.utils.data.DataLoader(test_data, test_batch_size, shuffle=False)
  return train_loader, val_loader, test_loader

In [None]:
''' Input arguments batch_size: Size of a mini-batch device: GPU where you want to train your
network weight_decay: Weight decay co-efficient for regularization of weights momentum: Momentum
for SGD optimizer epochs: Number of epochs for training the network '''

def main(batch_size=128, device='cuda:0', learning_rate=0.01, weight_decay=0.000001, 
momentum=0.9, epochs=50):
  train_loader, val_loader, test_loader = get_data(batch_size) 
  net = AlexNet().to(device)
  optimizer = get_optimizer(net, learning_rate, weight_decay, momentum) 
  loss_function = get_loss_function() 
  for e in range(epochs): 
    train_loss, train_accuracy = train(net, train_loader, optimizer, loss_function) 
    val_loss, val_accuracy = test(net, val_loader, loss_function) 
    print('Epoch: {:d}'.format(e+1)) 
    print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, 
    train_accuracy)) 
    print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, 
    val_accuracy)) 
    print('-----------------------------------------------------') 
    print('After training:') 
    train_loss, train_accuracy = test(net, train_loader, loss_function) 
    val_loss, val_accuracy = test(net, val_loader, loss_function) 
    test_loss, test_accuracy = test(net, test_loader, loss_function) 
    print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, 
    train_accuracy)) 
    print('\t Validation loss {:.5f}, Validation accuracy {:.2f}'.format(val_loss, 
    val_accuracy)) 
    print('\t Test loss {:.5f}, Test accuracy {:.2f}'.format(test_loss, test_accuracy)) 
    print('-----------------------------------------------------')

In [None]:
main()

Files already downloaded and verified
Files already downloaded and verified
Epoch: 1
	 Training loss 0.01142, Training accuracy 46.34
	 Validation loss 0.00514, Validation accuracy 54.83
-----------------------------------------------------
After training:
	 Training loss 0.00981, Training accuracy 55.26
	 Validation loss 0.00514, Validation accuracy 54.83
	 Test loss 0.00508, Test accuracy 55.47
-----------------------------------------------------
Epoch: 2
	 Training loss 0.00812, Training accuracy 62.92
	 Validation loss 0.00418, Validation accuracy 64.13
-----------------------------------------------------
After training:
	 Training loss 0.00763, Training accuracy 66.59
	 Validation loss 0.00418, Validation accuracy 64.13
	 Test loss 0.00407, Test accuracy 65.68
-----------------------------------------------------
Epoch: 3
	 Training loss 0.00670, Training accuracy 69.57
	 Validation loss 0.00351, Validation accuracy 69.66
-----------------------------------------------------
Aft