# PW03 - Group 4
* Florian Bär
* Matthias Egli
* Manuel Vogel
* Adrian Willi

In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torchsummary import summary
import numpy as np
import matplotlib.pyplot as plt

### Loading Data

In [2]:
training_data = datasets.mnist.FashionMNIST(root="data", train=True, download=True, transform=ToTensor())
test_data = datasets.mnist.FashionMNIST(root="data", train=False, download=True, transform=ToTensor())

In [3]:
training_data, validation_data = torch.utils.data.random_split(training_data, [50000, 10000])

In [4]:
# 50000 10000 10000
print(len(training_data),len(validation_data),len(test_data))

50000 10000 10000


### MLP with Dropout Regularisation

Use different dropout rates for the input layer (`p_in`) and hidden layers (`p_hidden`). 

In [5]:
model = torch.nn.Sequential(
    torch.nn.Flatten(),
    torch.nn.Dropout(),
    torch.nn.Linear(784, 200),
    torch.nn.Dropout(),
    torch.nn.Sigmoid(),
    torch.nn.Linear(200, 10)
)

from torchsummary import summary
summary(model, (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                  [-1, 784]               0
           Dropout-2                  [-1, 784]               0
            Linear-3                  [-1, 200]         157,000
           Dropout-4                  [-1, 200]               0
           Sigmoid-5                  [-1, 200]               0
            Linear-6                   [-1, 10]           2,010
Total params: 159,010
Trainable params: 159,010
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Params size (MB): 0.61
Estimated Total Size (MB): 0.63
----------------------------------------------------------------


### Training

Implement the training / evaluation loop

Remember and return training / validation cost and accuracy per epoch. 

In [6]:
def train_eval(model, lr, nepochs, nbatch, training_data, validation_data):
    # finally return the sequence of per epoch values
    cost_hist = []
    cost_hist_test = []
    acc_hist = []
    acc_hist_test = []

    cost_ce = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    ### YOUR CODE START ###
    
    # epoch: current epoch
    # cost, cost_test, acc, acc_test: cost and acurracy (for training, validation set) per epoch     
    
    training_loader = DataLoader(training_data, batch_size=nbatch)
    validation_loader = DataLoader(validation_data, batch_size=nbatch)
    
    for epoch in range(nepochs):

        training_cost = 0
        correct = 0
        for inputs, targets in training_loader:
            optimizer.zero_grad()
            predictions = model(inputs)
            cost = cost_ce(predictions, targets)
            cost.backward()
            optimizer.step()
            training_cost += cost.item()
            correct += (torch.argmax(predictions, dim=1) == targets).sum()
        
        cost = training_cost / len(training_data)
        acc = correct / len(training_data)

        validation_cost = 0
        correct = 0
        for inputs, targets in validation_loader:
            predictions = model(inputs)
            cost = cost_ce(predictions, targets)
            validation_cost += cost.item()
            correct += (torch.argmax(predictions, dim=1) == targets).sum()

        cost_test = validation_cost / len(validation_data)
        acc_test = correct / len(validation_data)
        
        print("Epoch %i: %f, %f, %f, %f"%(epoch, cost, acc, cost_test, acc_test))

        ### YOUR CODE END ###
        
        cost_hist.append(cost.data)
        cost_hist_test.append(cost_test)
        acc_hist.append(acc)
        acc_hist_test.append(acc_test)
    return cost_hist, cost_hist_test, acc_hist, acc_hist_test

### Analyse Different Settings

Start with a baseline model: 200 units in a single hidden layer; batch size 64; properly tuned learning rate, no dropout.

Then play with different model complexities and dropout rates and compare them on the basis of the validation set.

Estimate also the variance error by the difference between validation and training loss / accuracy.

Finally, identify a favourite combination (model complexity, dropout rate) and compute the test accuracy. 


#### Find suitable baseline

In [7]:
nbatch = 64
nepochs = [20, 20, 30, 50, 100, 200, 200, 150]
lr = [0.5, 0.25, 0.1, 0.05, 0.01, 0.01, 0.25, 0.5]

complexity = 200
drop_p = 0

costs = {"train":[],"test":[]}
accs =  {"train":[],"test":[]}

for i in range(len(nepochs)):
    print("#"*50)
    print("nepochs: ", nepochs[i])
    print("lr: ", lr[i])
    print("#"*50)

    # model with given complexity and dropout
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Dropout(p=drop_p),
        torch.nn.Linear(784, complexity),
        torch.nn.Dropout(p=drop_p),
        torch.nn.Sigmoid(),
        torch.nn.Linear(complexity, 10)
    )

    cost_hist, cost_hist_test, acc_hist, acc_hist_test = train_eval(model, lr[i], nepochs[i], nbatch, training_data, validation_data)
    costs["train"].append(cost_hist)    
    costs["test"].append(cost_hist_test)
    accs["train"].append(acc_hist)    
    accs["test"].append(acc_hist_test)    

##################################################
nepochs:  20
lr:  0.5
##################################################
Epoch 0: 0.384789, 0.753800, 0.009458, 0.783700
Epoch 1: 0.268968, 0.833440, 0.007909, 0.816700
Epoch 2: 0.231855, 0.850340, 0.007248, 0.831400
Epoch 3: 0.213903, 0.861080, 0.006936, 0.837800
Epoch 4: 0.201337, 0.868420, 0.006759, 0.839300
Epoch 5: 0.191936, 0.873940, 0.006651, 0.841600
Epoch 6: 0.183862, 0.877960, 0.006565, 0.843700
Epoch 7: 0.177618, 0.881220, 0.006480, 0.846600
Epoch 8: 0.172510, 0.884660, 0.006386, 0.849100
Epoch 9: 0.167571, 0.888220, 0.006285, 0.852300
Epoch 10: 0.162678, 0.890820, 0.006185, 0.855300
Epoch 11: 0.157806, 0.893700, 0.006090, 0.856900
Epoch 12: 0.152878, 0.896240, 0.005999, 0.860100
Epoch 13: 0.148215, 0.898880, 0.005907, 0.861700
Epoch 14: 0.143778, 0.900940, 0.005815, 0.863700
Epoch 15: 0.139348, 0.903500, 0.005725, 0.866200
Epoch 16: 0.134948, 0.905740, 0.005636, 0.867100
Epoch 17: 0.130671, 0.908040, 0.005552, 0.870100
Epoc

Epoch 36: 0.283210, 0.841200, 0.007166, 0.839100
Epoch 37: 0.280320, 0.842140, 0.007133, 0.840100
Epoch 38: 0.277589, 0.842780, 0.007102, 0.841000
Epoch 39: 0.275002, 0.843400, 0.007071, 0.841600
Epoch 40: 0.272547, 0.844160, 0.007042, 0.842500
Epoch 41: 0.270212, 0.845120, 0.007015, 0.842800
Epoch 42: 0.267988, 0.845740, 0.006988, 0.843100
Epoch 43: 0.265863, 0.846540, 0.006962, 0.843600
Epoch 44: 0.263831, 0.847020, 0.006937, 0.843500
Epoch 45: 0.261883, 0.847360, 0.006912, 0.844500
Epoch 46: 0.260013, 0.847880, 0.006889, 0.844900
Epoch 47: 0.258214, 0.848460, 0.006866, 0.845900
Epoch 48: 0.256481, 0.849040, 0.006844, 0.846000
Epoch 49: 0.254809, 0.849480, 0.006823, 0.846800
Epoch 50: 0.253194, 0.849960, 0.006802, 0.847000
Epoch 51: 0.251631, 0.850420, 0.006782, 0.847600
Epoch 52: 0.250116, 0.850880, 0.006763, 0.847700
Epoch 53: 0.248646, 0.851060, 0.006744, 0.848300
Epoch 54: 0.247219, 0.851600, 0.006725, 0.848800
Epoch 55: 0.245831, 0.852200, 0.006707, 0.849200
Epoch 56: 0.244479, 

Epoch 101: 0.208979, 0.865620, 0.006197, 0.859100
Epoch 102: 0.208393, 0.865860, 0.006189, 0.859300
Epoch 103: 0.207816, 0.866180, 0.006182, 0.859400
Epoch 104: 0.207250, 0.866440, 0.006174, 0.859900
Epoch 105: 0.206694, 0.866720, 0.006166, 0.859800
Epoch 106: 0.206147, 0.866960, 0.006159, 0.860300
Epoch 107: 0.205609, 0.867080, 0.006151, 0.860300
Epoch 108: 0.205081, 0.867580, 0.006144, 0.860400
Epoch 109: 0.204561, 0.867740, 0.006137, 0.860900
Epoch 110: 0.204049, 0.868000, 0.006129, 0.861000
Epoch 111: 0.203546, 0.868400, 0.006122, 0.861300
Epoch 112: 0.203051, 0.868620, 0.006115, 0.861700
Epoch 113: 0.202563, 0.868780, 0.006108, 0.861600
Epoch 114: 0.202083, 0.868900, 0.006101, 0.861800
Epoch 115: 0.201610, 0.869180, 0.006094, 0.862200
Epoch 116: 0.201144, 0.869440, 0.006088, 0.862500
Epoch 117: 0.200685, 0.869420, 0.006081, 0.862800
Epoch 118: 0.200232, 0.869740, 0.006074, 0.863300
Epoch 119: 0.199785, 0.869920, 0.006068, 0.863500
Epoch 120: 0.199345, 0.870220, 0.006061, 0.864000


Epoch 64: 0.142941, 0.944980, 0.005236, 0.888500
Epoch 65: 0.143265, 0.945520, 0.005246, 0.888300
Epoch 66: 0.143612, 0.946260, 0.005256, 0.888300
Epoch 67: 0.143981, 0.946860, 0.005266, 0.887800
Epoch 68: 0.144374, 0.947520, 0.005277, 0.887500
Epoch 69: 0.144787, 0.948300, 0.005289, 0.887500
Epoch 70: 0.145216, 0.949160, 0.005301, 0.887600
Epoch 71: 0.145658, 0.950140, 0.005313, 0.887900
Epoch 72: 0.146107, 0.950800, 0.005325, 0.887700
Epoch 73: 0.146557, 0.951520, 0.005338, 0.887900
Epoch 74: 0.147002, 0.952380, 0.005351, 0.887900
Epoch 75: 0.147434, 0.953120, 0.005365, 0.888300
Epoch 76: 0.147845, 0.953820, 0.005378, 0.888300
Epoch 77: 0.148228, 0.954320, 0.005392, 0.888400
Epoch 78: 0.148573, 0.955060, 0.005407, 0.888300
Epoch 79: 0.148871, 0.955640, 0.005421, 0.888400
Epoch 80: 0.149113, 0.956200, 0.005435, 0.888200
Epoch 81: 0.149289, 0.956880, 0.005450, 0.888100
Epoch 82: 0.149391, 0.957580, 0.005465, 0.887800
Epoch 83: 0.149410, 0.958360, 0.005480, 0.887400
Epoch 84: 0.149339, 

Epoch 27: 0.118030, 0.925420, 0.005384, 0.879600
Epoch 28: 0.119663, 0.926820, 0.005378, 0.880000
Epoch 29: 0.121910, 0.928200, 0.005374, 0.880800
Epoch 30: 0.124722, 0.929700, 0.005373, 0.881200
Epoch 31: 0.128036, 0.931300, 0.005375, 0.882000
Epoch 32: 0.131786, 0.932660, 0.005380, 0.881600
Epoch 33: 0.135918, 0.933840, 0.005388, 0.882500
Epoch 34: 0.140407, 0.935220, 0.005398, 0.883000
Epoch 35: 0.145228, 0.936680, 0.005409, 0.882800
Epoch 36: 0.150338, 0.938100, 0.005421, 0.883400
Epoch 37: 0.155664, 0.939380, 0.005433, 0.884400
Epoch 38: 0.161120, 0.940440, 0.005444, 0.884800
Epoch 39: 0.166664, 0.941440, 0.005456, 0.885500
Epoch 40: 0.172298, 0.942400, 0.005467, 0.885900
Epoch 41: 0.178048, 0.943400, 0.005477, 0.886100
Epoch 42: 0.183919, 0.944880, 0.005488, 0.886400
Epoch 43: 0.189860, 0.946000, 0.005501, 0.887100
Epoch 44: 0.195789, 0.947060, 0.005517, 0.887100
Epoch 45: 0.201642, 0.948020, 0.005534, 0.888100
Epoch 46: 0.207388, 0.949160, 0.005553, 0.888000
Epoch 47: 0.213015, 

KeyboardInterrupt: 

Results:


| nepochs | lr | Acc (Train) | Acc (Test) |
| :-: | :-: | :-: | :-: |
| 20 | 0.5 | 91.24% | 86.86% |
| 20 | 0.25 | 89.58% | 86.1% |
| 30 | 0.1 | 88.77% | 86.07% |
| 50 | 0.05 | 88.47% | 86.56 % |
| 100 | 0.1 | 86.62%| 86% | 
| 200 | 0.1 | 88.25% | 87.12% |
| **100** | **0.25** | **96.79%** | **88.6%** | 
| 200 | 0.25 | 99.68% | 88.86% |
| 150 | 0.5 | 99.77%| 88.87% | 

#### Increase complexity

In [None]:
nbatch = 64
nepochs = 100
lr = 0.25

complexity = [200, 300, 500, 750, 1000, 750, 500]
drop_p = [0, 0.01, 0.05, 0.1, 0.25, 0.05, 0.05]

costs = {"train":[],"test":[]}
accs =  {"train":[],"test":[]}

for i in range(len(drop_p)):
    print("########")
    print("Dropout: ", drop_p[i])
    print("Complexity: ", complexity[i])
    print("########")

    # model with given complexity and dropout
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Dropout(p=drop_p[i]),
        torch.nn.Linear(784, complexity[i]),
        torch.nn.Dropout(p=drop_p[i]),
        torch.nn.Sigmoid(),
        torch.nn.Linear(complexity[i], 10)
    )

    cost_hist, cost_hist_test, acc_hist, acc_hist_test = train_eval(model, lr, nepochs, nbatch, training_data, validation_data)
    costs["train"].append(cost_hist)    
    costs["test"].append(cost_hist_test)
    accs["train"].append(acc_hist)    
    accs["test"].append(acc_hist_test)    

Best setting when increasing complexity:
* nepochs = 100
* lr = 0.25


| dropout | complexity | Acc (Train) | Acc (Test) |
| :-: | :-: | :-: | :-: |
| **0** | **200** | **9677%** | **8946%** |
| 0.01 | 300 | 9483% | 8913% |
| 0.05 | 500 | 9211% | 8894% |
| 0.1 | 750 | 9049% |  8807% |
| 0.25 | 1000 | 8759%| 8621% | 
| 0.05 | 750 | 9212% | 8853% |
| 0.005 | 200 | 9562% | 8939% |

### Suitable Output Plots

Possibly adjust to fit your needs...

In [None]:
colors = ["b--","r--","m--","g--","y--"]
colors_test = ["b-","r-","m-","g-","y-"]
plt.figure(1, figsize=(12,8))
for i in range(len(drop_p)):
    plt.plot(torch.arange(nepochs), costs["train"][i], colors[i], label="train "+str(drop_p[i]))
    plt.plot(torch.arange(nepochs), costs["test"][i], colors_test[i], label="test "+str(drop_p[i]))
plt.xlabel("Epoch", fontsize=18)
plt.xlim(0,nepochs)
plt.ylim(0,0.5)
plt.title("Cross-Entropy Cost", fontsize=18)
plt.legend(bbox_to_anchor = (1.05, 0.6))
plt.figure(2, figsize=(12,8))
for i in range(len(drop_p)):
    acc = np.array(accs["train"][i])
    acc_test = np.array(accs["test"][i])
    plt.plot(torch.arange(nepochs), acc, colors[i], label="train "+str(drop_p[i]))
    plt.plot(torch.arange(nepochs), acc_test, colors_test[i], label="test "+str(drop_p[i]))
plt.xlabel("Epoch", fontsize=18)
plt.xlim(0,nepochs)
plt.ylim(0.9,1.0)
plt.title("Accuracy", fontsize=18)
plt.legend(bbox_to_anchor = (1.05, 0.6))

#### Decrease complexity

In [None]:
nbatch = 64
nepochs = 100
lr = 0.25

complexity = [200, 100, 50, 50, 20]
drop_p = [0, 0.01, 0.01, 0.005, 0.01]

costs = {"train":[],"test":[]}
accs =  {"train":[],"test":[]}

for i in range(len(drop_p)):
    print("########")
    print("Dropout: ", drop_p[i])
    print("Complexity: ", complexity[i])
    print("########")

    # model with given complexity and dropout
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Dropout(p=drop_p[i]),
        torch.nn.Linear(784, complexity[i]),
        torch.nn.Dropout(p=drop_p[i]),
        torch.nn.Sigmoid(),
        torch.nn.Linear(complexity[i], 10)
    )

    cost_hist, cost_hist_test, acc_hist, acc_hist_test = train_eval(model, lr, nepochs, nbatch, training_data, validation_data)
    costs["train"].append(cost_hist)    
    costs["test"].append(cost_hist_test)
    accs["train"].append(acc_hist)    
    accs["test"].append(acc_hist_test)    

Best setting when decreasing complexity:
* nepochs = 100
* lr = 0.25


| dropout | complexity | Acc (Train) | Acc (Test) |
| :-: | :-: | :-: | :-: |
| **0** | **200** | **96.88%** | **89.17%** |
| 0.01 | 100 | 94.48% | 88.73% |
| 0.01 | 50 | 93.24% | 88.35% |
| 0.005 | 50 | 93.98% |  88.48% |
| 0.01 | 20 | 90.49%| 86.69% | 

### Suitable Output Plots

Possibly adjust to fit your needs...

In [None]:
colors = ["b--","r--","m--","g--","y--"]
colors_test = ["b-","r-","m-","g-","y-"]
plt.figure(1, figsize=(12,8))
for i in range(len(drop_p)):
    plt.plot(torch.arange(nepochs), costs["train"][i], colors[i], label="train "+str(drop_p[i]))
    plt.plot(torch.arange(nepochs), costs["test"][i], colors_test[i], label="test "+str(drop_p[i]))
plt.xlabel("Epoch", fontsize=18)
plt.xlim(0,nepochs)
plt.ylim(0,0.5)
plt.title("Cross-Entropy Cost", fontsize=18)
plt.legend(bbox_to_anchor = (1.05, 0.6))
plt.figure(2, figsize=(12,8))
for i in range(len(drop_p)):
    acc = np.array(accs["train"][i])
    acc_test = np.array(accs["test"][i])
    plt.plot(torch.arange(nepochs), acc, colors[i], label="train "+str(drop_p[i]))
    plt.plot(torch.arange(nepochs), acc_test, colors_test[i], label="test "+str(drop_p[i]))
plt.xlabel("Epoch", fontsize=18)
plt.xlim(0,nepochs)
plt.ylim(0.9,1.0)
plt.title("Accuracy", fontsize=18)
plt.legend(bbox_to_anchor = (1.05, 0.6))