In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

In [13]:
torch.cuda.device_count(), torch.cuda.get_device_name(0)

(4, 'NVIDIA A100 80GB PCIe')

In [14]:
# Check if GPU is available and set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [15]:
# Define the data transformations
transform = transforms.Compose([transforms.ToTensor()])

# Load the Fashion MNIST dataset
train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)

# Create data loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

torch.manual_seed(0)

<torch._C.Generator at 0x7f67cc11f530>

In [16]:
# # If you are using CuDNN , otherwise you can just ignore
torch.cuda.manual_seed(0)
torch.backends.cudnn.deterministic=True
torch.backends.cudnn.benchmark=False

In [17]:
# tune_me
activation = nn.ReLU()
learning_rate = 0.1

In [18]:
# Define the CNN architecture
class FashionCNN(nn.Module):
    def __init__(self):
        super(FashionCNN, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1)
        self.activation1 = activation
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1)
        self.activation2 = activation
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(1024, 1024)
        self.activation3 = activation

        self.fc2 = nn.Linear(1024, 256)
        self.activation4 = activation

        self.dropout = nn.Dropout(p=0.3)

        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool1(self.activation1(self.conv1(x)))
        x = self.pool2(self.activation2(self.conv2(x)))
        
        # flatten for fully connected layer
        x = x.view(x.size(0), -1)
        x = self.activation3(self.fc1(x))
        x = self.activation4(self.fc2(x))
        
        x = self.dropout(x)
        x = self.fc3(x)
        return x

In [19]:
def init_weights(m):
    if isinstance(m, nn.Linear) and isinstance(m, nn.Conv2d):
        nn.init.xavier_uniform_(m.weight)

In [20]:
# Move the model to GPU
model = FashionCNN().to(device)
model.apply(init_weights)

print(model)

# Instantiate the loss function and optimizer
criterion = nn.CrossEntropyLoss()
criterion.to(device)
optimizer = optim.SGD(list(model.parameters()), lr=learning_rate)


# criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(list(model.parameters()), lr=learning_rate)

FashionCNN(
  (conv1): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
  (activation1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (activation2): ReLU()
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=1024, bias=True)
  (activation3): ReLU()
  (fc2): Linear(in_features=1024, out_features=256, bias=True)
  (activation4): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (fc3): Linear(in_features=256, out_features=10, bias=True)
)


In [21]:
def evaluation(dataloader):
  total, correct = 0,0
  # turn on evaluate mode, this de-activates certain modes such as dropout
  # good practice to include in your projects
  model.eval()
  for data in dataloader:

    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    outputs = model(inputs)
    # we take the index of the class that received the highest value
    # we take outputs.data so that no backpropagation is performed for these outputs
    _, pred = torch.max(outputs.data, 1)
    total += labels.size(0)
    # .item() takes Python float values from the tensor
    correct += (pred == labels).sum().item()
  return 100 * correct / total

In [None]:
loss_epoch_array = []
max_epochs = 30
loss_epoch = 0
train_accuracy = []
test_accuracy = []
# loop over epochs
for epoch in range(max_epochs):
  # we will compute sum of batch losses per epoch
  loss_epoch = 0
  # loop over batches
  for i, data in enumerate(train_loader, 0):
    # to ensure the training mode is "turned on"
    model.train()
    inputs, labels = data
    inputs, labels = inputs.to(device), labels.to(device)
    # zero the gradients
    optimizer.zero_grad()
    outputs = model(inputs)
    # compute the loss
    loss = criterion(outputs, labels)
    # calculate the gradients
    loss.backward()
    # update the parameters using the gradients and optimizer algorithm
    optimizer.step()
    # we sum the loss over batches
    loss_epoch += loss.item()

  loss_epoch_array.append(loss_epoch)
  train_accuracy.append(evaluation(train_loader))
  test_accuracy.append(evaluation(test_loader))
  print("Epoch {}: loss: {}, train accuracy: {}, valid accuracy:{}".format(epoch + 1, loss_epoch_array[-1], train_accuracy[-1], test_accuracy[-1]))

Epoch 1: loss: 1170.087720297277, train accuracy: 85.43666666666667, valid accuracy:84.4
Epoch 2: loss: 656.2253056056798, train accuracy: 89.71666666666667, valid accuracy:88.28
Epoch 3: loss: 546.2004272732884, train accuracy: 89.865, valid accuracy:87.95
Epoch 4: loss: 485.3447560723871, train accuracy: 91.13333333333334, valid accuracy:89.01
Epoch 5: loss: 436.79365026950836, train accuracy: 92.54833333333333, valid accuracy:90.03
Epoch 6: loss: 397.61660710535944, train accuracy: 93.325, valid accuracy:90.38
Epoch 7: loss: 360.6707063424401, train accuracy: 93.72, valid accuracy:90.58
Epoch 8: loss: 331.0344875762239, train accuracy: 93.39666666666666, valid accuracy:90.17
Epoch 9: loss: 303.0613756850362, train accuracy: 95.55166666666666, valid accuracy:91.35


In [None]:
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.plot(train_accuracy, "r")
plt.plot(test_accuracy, "b")
plt.gca().legend(('train','test'))

In [None]:
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.plot(loss_epoch_array)