In [1]:
import torch.nn as nn
import torch

In [4]:
def nin_block(in_channels, out_channels, kernel_size, stride, padding):
    return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding), nn.ReLU(),
                         nn.Conv2d(out_channels, out_channels, 1), nn.ReLU(),
                         nn.Conv2d(out_channels, out_channels, 1), nn.ReLU())

In [9]:
device = torch.device("cpu")

In [19]:
net = nn.Sequential(
    nin_block(1, 96, kernel_size=11, stride=4, padding=0),
    nn.MaxPool2d(kernel_size=3, stride=2),
    
    nin_block(96, 256, kernel_size=5, stride=1, padding=2),
    nn.MaxPool2d(kernel_size=3, stride=2),
    
    nin_block(256, 384, kernel_size=3, stride=1, padding=1),
    nn.MaxPool2d(kernel_size=3, stride=2),
    
    nn.Dropout(0.5),
    
    nin_block(384, 10, kernel_size=3, stride=1, padding=1),
    nn.AvgPool2d(kernel_size=5, stride=1),
    nn.Flatten()
)
net.to(device);

In [21]:
X = torch.rand((1, 1, 224, 224))

for layer in net:
    X = layer(X)
    print(layer.__class__.__name__, "output shape:\t", X.shape)

print("=" * 80)
print("Model architecture:")
print(net)

Sequential output shape:	 torch.Size([1, 96, 54, 54])
MaxPool2d output shape:	 torch.Size([1, 96, 26, 26])
Sequential output shape:	 torch.Size([1, 256, 26, 26])
MaxPool2d output shape:	 torch.Size([1, 256, 12, 12])
Sequential output shape:	 torch.Size([1, 384, 12, 12])
MaxPool2d output shape:	 torch.Size([1, 384, 5, 5])
Dropout output shape:	 torch.Size([1, 384, 5, 5])
Sequential output shape:	 torch.Size([1, 10, 5, 5])
AvgPool2d output shape:	 torch.Size([1, 10, 1, 1])
Flatten output shape:	 torch.Size([1, 10])
Model architecture:
Sequential(
  (0): Sequential(
    (0): Conv2d(1, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU()
    (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU()
  )
  (1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2)

In [22]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.optim as optim
import time

In [23]:
device = torch.device("cuda:0")
net = net.to(device)

In [24]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

training_set = datasets.FashionMNIST("dataset/fashion-mnist",
                                     transform=transform,
                                     train=True,
                                     download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)

test_set = datasets.FashionMNIST("dataset/fashion-mnist",
                                 transform=transform,
                                 train=False,
                                 download=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64)

In [27]:
criteration = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9)

In [28]:
epochs = 10

for i in range(epochs):
    running_loss = 0.0
    start_time = time.time()
    
    for (inputs, labels) in training_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        net.train()
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criteration(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    end_time = time.time() - start_time
    print(f"Epoch: {i + 1}, Loss: {running_loss / len(training_loader)}, Time: {end_time}")

Epoch: 1, Loss: 2.302584886805081, Time: 75.77395510673523
Epoch: 2, Loss: 2.3025848865509033, Time: 75.71312594413757
Epoch: 3, Loss: 2.3025848865509033, Time: 76.07230377197266
Epoch: 4, Loss: 2.302584886805081, Time: 77.02252626419067
Epoch: 5, Loss: 2.3025848865509033, Time: 78.02374911308289
Epoch: 6, Loss: 2.3025848865509033, Time: 78.94682598114014
Epoch: 7, Loss: 2.3025848865509033, Time: 79.03703379631042
Epoch: 8, Loss: 2.3025848865509033, Time: 77.80244946479797
Epoch: 9, Loss: 2.302584886805081, Time: 77.74833631515503
Epoch: 10, Loss: 2.3025848865509033, Time: 75.65408253669739


In [30]:
total_predictions = 0
total_correct = 0

for data in test_loader:
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)
    
    outputs = torch.argmax(net(inputs), dim=1)
    check = (outputs - labels) == 0
    
    total_correct += len(check[check == True])
    total_predictions += len(check)

print(f"Correct percentage: {total_correct / total_predictions * 100}%")

Correct percentage: 10.0%
