In [1]:
import torch.nn as nn
import torch
# import torchvision

In [2]:
def vgg_block(num_convs, in_channels, out_channels):
    layers = []
    
    for i in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        
        in_channels = out_channels
    
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    
    return nn.Sequential(*layers)

In [3]:
conv_arch = (
    (1, 1, 64),  # 112
    (1, 64, 128),  # 56
    (2, 128, 256),  # 28
    (2, 256, 512),  # 14
    (2, 512, 512)  # 7
)

In [4]:
def vgg(conv_arch):
    layers = []
    
    for (num_convs, in_channels, out_channels) in conv_arch:
        layers.append(vgg_block(num_convs, in_channels, out_channels))
    
    last_conv_out_channels = layers[-1][2].out_channels
    
    return nn.Sequential(*layers,
                         nn.Flatten(),
                         nn.Linear(7 * 7 * last_conv_out_channels, 4096), nn.ReLU(), nn.Dropout(0.5),
                         nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
                         nn.Linear(4096, 10))

In [5]:
net = vgg(conv_arch)
net = net.to("cpu")

In [6]:
X = torch.randn(size=(1, 1, 224, 224))

for blk in net:
    X = blk(X)
    print(blk.__class__.__name__, "output shape:\t", X.shape)

print("=" * 100)
print("Model architecture:")
print(net)

Sequential output shape:	 torch.Size([1, 64, 112, 112])
Sequential output shape:	 torch.Size([1, 128, 56, 56])
Sequential output shape:	 torch.Size([1, 256, 28, 28])
Sequential output shape:	 torch.Size([1, 512, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
Flatten output shape:	 torch.Size([1, 25088])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 10])
Model architecture:
Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, st

In [19]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.optim as optim
import time

In [7]:
ratio = 4
small_conv_arch = [(e[0], 1 if e[1] == 1 else e[1] // ratio, e[2] // ratio) for e in conv_arch]

In [8]:
device = torch.device("cuda:0")

net = vgg(small_conv_arch)
net = net.to(device)

In [10]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [15]:
training_set = datasets.FashionMNIST("dataset/fashion-mnist",
                                     transform=transform,
                                     train=True,
                                     download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=64, shuffle=True)

test_set = datasets.FashionMNIST("dataset/fashion-mnist",
                                 transform=transform,
                                 train=False,
                                 download=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64)

In [16]:
print(net[0][0].weight.shape)
print(net[0][0].bias.shape)

torch.Size([16, 1, 3, 3])
torch.Size([16])


In [17]:
criteration = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

In [21]:
epochs = 10

for i in range(epochs):
    running_loss = 0.0
    start_time = time.time()
    
    for (inputs, labels) in training_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        net.train()
        optimizer.zero_grad()
        
        outputs = net(inputs)
        loss = criteration(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    end_time = time.time() - start_time
    print(f"Epoch: {i + 1}, Loss: {running_loss / len(training_loader)}, Time: {end_time}")

Epoch: 1, Loss: 2.303135599154653, Time: 127.4416024684906
Epoch: 2, Loss: 1.7920728398602146, Time: 126.68796110153198
Epoch: 3, Loss: 0.4561358319798004, Time: 131.35995316505432
Epoch: 4, Loss: 0.3252240495561664, Time: 132.03863072395325
Epoch: 5, Loss: 0.27952998027459647, Time: 128.03232312202454
Epoch: 6, Loss: 0.25193177486088736, Time: 126.86922931671143
Epoch: 7, Loss: 0.22905942150699432, Time: 126.96793103218079
Epoch: 8, Loss: 0.2046036737432866, Time: 129.89544439315796
Epoch: 9, Loss: 0.1842307909838617, Time: 127.72252225875854
Epoch: 10, Loss: 0.16326205704663035, Time: 127.61417484283447


In [22]:
total_predictions = 0
total_correct = 0

for data in test_loader:
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)
    
    outputs = torch.argmax(net(inputs), dim=1)
    check = (outputs - labels) == 0
    
    total_correct += len(check[check == True])
    total_predictions += len(check)

print(f"Correct percentage: {total_correct / total_predictions * 100}%")

Correct percentage: 91.29%
