In [31]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import FashionMNIST
from torch.utils.data import random_split
import numpy as np
import matplotlib.pyplot as plt

In [57]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_set = FashionMNIST(
    train=True,
    download=True,
    root='./data',
    transform=transform
)

test_set = FashionMNIST(
    train=False,
    download=True,
    root='./data',
    transform=transform
)

In [26]:
class Inception(nn.Module):
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # branch 1
        self.b1_1 = nn.LazyConv2d(
            c1,
            kernel_size=(1, 1)
        )
        # branch 2
        self.b2_1 = nn.LazyConv2d(
            c2[0],
            kernel_size=(1, 1)
        )
        self.b2_2 = nn.LazyConv2d(
            c2[1],
            kernel_size=(3, 3),
            padding=1
        )
        # branch 3
        self.b3_1 = nn.LazyConv2d(
            c3[0],
            kernel_size=(1, 1)
        )
        self.b3_2 = nn.LazyConv2d(
            c3[1],
            kernel_size=(5, 5),
            padding=2
        )
        # branch 4
        self.b4_1 = nn.MaxPool2d(
            kernel_size=(3, 3),
            stride=1,
            padding=1
        )
        self.b4_2 = nn.LazyConv2d(
            c4,
            kernel_size=(1, 1)
        )
    def forward(self, x):
        b1 = F.relu(self.b1_1(x))
        b2 = F.relu(self.b2_2(F.relu(self.b2_1(x))))
        b3 = F.relu(self.b3_2(F.relu(self.b3_1(x))))
        b4 = F.relu(self.b4_2(self.b4_1(x)))
        return torch.cat((b1, b2, b3, b4), dim=1)

class GGNet(nn.Module):
    def __init__(self, num_cls=10):
        super(GGNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.LazyConv2d(
                64,
                kernel_size=(7, 7),
                stride=2,
                padding=3
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=(3, 3),
                stride=2,
                padding=1
            ),
            nn.LazyConv2d(
                64,
                kernel_size=(1, 1)
            ),
            nn.ReLU(),
            nn.LazyConv2d(
                192,
                kernel_size=(3, 3),
                padding=1
            ),
            nn.ReLU(),
            nn.MaxPool2d(
                kernel_size=(3, 3), 
                stride=2,
                padding=1
            )
        )
        self.inception1 = nn.Sequential(
            Inception(64, (96, 128), (16, 32), 32),
            Inception(128, (128, 192), (32, 96), 64),
        )
        self.maxpool1 = nn.MaxPool2d(
            kernel_size=(3, 3),
            stride=2,
            padding=1
        )
        self.inception2 = nn.Sequential(
            Inception(192, (96, 208), (16, 48), 64),
            Inception(160, (112, 224), (24, 64), 64),
            Inception(128, (128, 256), (24, 64), 64),
            Inception(112, (144, 288), (32, 64), 64),
            Inception(256, (160, 320), (32, 128), 128)
        )
        self.maxpool2 = nn.MaxPool2d(
            kernel_size=(3, 3),
            stride=2,
            padding=1
        )
        self.inception3 = nn.Sequential(
            Inception(256, (160, 320), (32, 128), 128),
            Inception(384, (192, 384), (48, 128), 128)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = nn.Flatten()
        self.fc = nn.LazyLinear(num_cls)

    def forward(self, x):
        x = self.layer1(x)
        x = self.inception1(x)
        x = self.maxpool1(x)
        x = self.inception2(x)
        x = self.maxpool2(x)
        x = self.inception3(x)
        x = self.avgpool(x)
        x = self.flatten(x)
        x = self.fc(x)
        return x

# Hyperparameters

In [59]:
EPOCHS = 1
LR = 0.001
BATCH_SIZE = 16
TRAIN_RATIO = 0.75
TRAIN_SIZE = int(TRAIN_RATIO * len(train_set))
VAL_SIZE = len(train_set) - TRAIN_SIZE
MOMENTUM = 0.9
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

model = GGNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    model.parameters(),
    lr=LR,
    momentum=MOMENTUM
)
    



## Data loaders

In [44]:
train_set, val_set = random_split(
    train_set,
    [TRAIN_SIZE, VAL_SIZE],
    generator=torch.Generator().manual_seed(42)
)

In [None]:
train_loader = torch.utils.data.DataLoader(
    dataset=train_set,
    shuffle=True,
    batch_size=BATCH_SIZE
)
val_loader = torch.utils.data.DataLoader(
    dataset=val_set,
    batch_size=BATCH_SIZE
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_set,
    batch_size=BATCH_SIZE
)

## For plotting

In [50]:
history = {
    'train_loss': [],
    'val_loss': [],
    'train_acc': [],
    'val_acc': []
}

## Training

In [60]:
import time 

start_time = time.time()
for epoch in range(EPOCHS):
    train_loss = 0
    val_loss = 0
    train_acc = 0
    val_acc = 0

    for batch_idx, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        outputs = model(inputs)

        optimizer.zero_grad()
        loss = criterion(outputs, labels)
        optimizer.step()

        train_loss += loss.item()
        train_acc += (outputs.argmax(1) == labels).type(torch.float).sum().item()

    with torch.no_grad():
        for batch_idx, data in enumerate(val_loader):
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs = model(inputs)

            loss = criterion(outputs, labels)

            val_loss += loss.item()
            val_acc += (outputs.argmax(1) == labels).type(torch.float).sum().item()

    train_loss /= len(train_loader)
    val_loss /= len(val_loader)

    train_acc /= len(train_set)
    val_acc /= len(val_acc)

    history['train_loss'] = train_loss
    history['val_loss'] = val_loss
    history['train_acc'] = train_acc
    history['val_acc'] = val_acc 

    print(f"Epoch: {epoch}/{EPOCHS}")
    print(f"Train's loss: {train_loss}, train's accuracy: {train_acc}%")
    print(f"Val's loss: {val_loss}, val's accuracy: {val_acc}%")

end_time = time.time()
print('Finished training')
print(f'Training took {end_time - start_time} secs')

TypeError: img should be Tensor Image. Got <class 'PIL.Image.Image'>