In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from collections import OrderedDict

training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64)



In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, fc_layers, conv_layers):
        super().__init__()
        self.convolution_stack = nn.Sequential(OrderedDict(conv_layers))
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(OrderedDict(fc_layers))

    def forward(self, x):
        x = self.convolution_stack(x)
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


In [None]:
def find_layer(previous_size, ind, l_s_list, l_a_list):
    name = l_a_list[ind]
    act_name = None
    act = None
    current_size = None
    if(name == 'flatten'):
        act_name = name + str(ind+1)
        act = nn.Flatten()
        current_size = (previous_size[0]*previous_size[1]*previous_size[2],)
        return current_size, act_name, act

    if((name == 'conv1d') or (name == 'conv2d') or (name == 'conv3d')):
        if(name == 'conv2d'):
            act_name = name + str(ind+1)
            act = nn.Conv2d(previous_size[0], l_s_list[ind]['n_channel'], l_s_list[ind]['k_size'], stride=l_s_list[ind]['s_size'], padding=l_s_list[ind]['p_size'])
            c_size = l_s_list[ind]['n_channel']
            height = (previous_size[1] + 2*l_s_list[ind]['p_size'] - l_s_list[ind]['k_size'])//l_s_list[ind]['s_size'] + 1
            width = (previous_size[2] + 2*l_s_list[ind]['p_size'] - l_s_list[ind]['k_size'])//l_s_list[ind]['s_size'] + 1
            current_size = (c_size, height, width)
            return current_size, act_name, act

    if(name == 'maxpool'):
        act_name = name + str(ind+1)
        act = nn.MaxPool2d(l_s_list[ind]['k_size'], stride=l_s_list[ind]['s_size'], padding=l_s_list[ind]['p_size'])
        height = (previous_size[1] + 2*l_s_list[ind]['p_size'] - l_s_list[ind]['k_size'])//l_s_list[ind]['s_size'] + 1
        width = (previous_size[2] + 2*l_s_list[ind]['p_size'] - l_s_list[ind]['k_size'])//l_s_list[ind]['s_size'] + 1
        current_size = (previous_size[0], height, width)
        return current_size, act_name, act

    if(name == 'linear'):
        act_name = name + str(ind+1)
        act = nn.Linear(previous_size[0], l_s_list[ind])
        current_size = (l_s_list[ind],)
        return current_size, act_name, act

    if(name == 'relu'):
        act_name = name + str(ind+1)
        act = nn.ReLU()
        return previous_size, act_name, act





def model_layer(previous_size, layer_size_list, layer_activation_list):
    defined_layer = []
    length = len(layer_size_list)
    for i in range(0, length):
        previous_size, act_name , act = find_layer(previous_size, i, layer_size_list, layer_activation_list)
        val = (act_name, act)
        defined_layer.append(val)

    return previous_size, defined_layer



In [None]:
# Display image and label.
train_features, train_labels = next(iter(train_dataloader))
previous_size = train_features.shape
previous_size = previous_size[1:]
print(previous_size)

layer_activation_list1 = ['conv2d', 'relu', 'maxpool', 'conv2d', 'relu', 'maxpool']
layer_size_list1 = [{'n_channel': 32, 'k_size': 3, 's_size': 1, 'p_size':1},
                    None,
                    {'k_size': 2, 's_size': 2, 'p_size':0},
                    {'n_channel': 64, 'k_size': 3, 's_size': 1, 'p_size':0},
                    None,
                    {'k_size': 2, 's_size': 2, 'p_size':0}]

previous_size1, convolution_layer = model_layer(previous_size, layer_size_list1, layer_activation_list1)
print(previous_size1)
print(convolution_layer)


torch.Size([1, 28, 28])
(64, 6, 6)
[('conv2d1', Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))), ('relu2', ReLU()), ('maxpool3', MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)), ('conv2d4', Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))), ('relu5', ReLU()), ('maxpool6', MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False))]


In [None]:
val = 1
for ele in previous_size1:
    val = val*ele

print(val)

2304


In [None]:

previous_size2 = (val,)
layer_activation_list = ['linear', 'linear', 'linear']
layer_size_list = [600, 120, 10]
previous_size3, fully_connected_layer = model_layer(previous_size2, layer_size_list, layer_activation_list)
print(fully_connected_layer)


[('linear1', Linear(in_features=2304, out_features=600, bias=True)), ('linear2', Linear(in_features=600, out_features=120, bias=True)), ('linear3', Linear(in_features=120, out_features=10, bias=True))]


In [None]:
# layer = [
#             ('linear1', nn.Linear(28*28, 512)),
#             ('relu1', nn.ReLU()),
#             ('linear2', nn.Linear(512, 512)),
#             ('relu2', nn.ReLU()),
#             ('linear3', nn.Linear(512, 10))
#             ]

model = NeuralNetwork(fully_connected_layer, convolution_layer)


In [None]:
learning_rate = 1e-3
batch_size = 64
epochs = 5



In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * batch_size + len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    # Set the model to evaluation mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients are computed during test mode
    # also serves to reduce unnecessary gradient computations and memory usage for tensors with requires_grad=True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")




In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")



Epoch 1
-------------------------------
loss: 2.307269  [   64/60000]
loss: 0.584017  [ 6464/60000]
loss: 0.402192  [12864/60000]
loss: 0.697057  [19264/60000]
loss: 0.543448  [25664/60000]
loss: 0.392037  [32064/60000]
loss: 0.410415  [38464/60000]
loss: 0.300309  [44864/60000]
loss: 0.394690  [51264/60000]
loss: 0.293399  [57664/60000]
Test Error: 
 Accuracy: 87.0%, Avg loss: 0.365496 

Epoch 2
-------------------------------
loss: 0.295698  [   64/60000]
loss: 0.402438  [ 6464/60000]
loss: 0.115554  [12864/60000]
loss: 0.294770  [19264/60000]
loss: 0.361781  [25664/60000]
loss: 0.287294  [32064/60000]
loss: 0.158949  [38464/60000]
loss: 0.113525  [44864/60000]
loss: 0.438683  [51264/60000]
loss: 0.201499  [57664/60000]
Test Error: 
 Accuracy: 88.4%, Avg loss: 0.323657 

Epoch 3
-------------------------------
loss: 0.285046  [   64/60000]
loss: 0.286256  [ 6464/60000]
loss: 0.284902  [12864/60000]
loss: 0.256451  [19264/60000]
loss: 0.272612  [25664/60000]
loss: 0.185140  [32064/600