In [1]:
import pandas as pd
import torch
import numpy as np
import utils
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

from utils import *
seed_everything()

### Siema!
Warsztat KNUM 05.04.2019, autorstwa Mateusza Olko i Julii Bazińskiej. 

Do zrobienia:
1. Zaimplementuj early stopping. Trzymaj licznik, od ilu epok loss nie spada, jeśli przekroczy daną liczbę epok, to kończ trening.
2. Zmniejsz lub zwiększ learning rate. Czy loss nadal spada? Szybciej czy wolniej?
3. Przetestuj inne warotści batch size.
4. Wypróbuj głębsze modele (np 3, 4 albo 8 warstw). Zobacz, czy zachodzi overfitting.
5. Dodaj regularyzację L2. Dodaje się ją do optimizera za pomocą argumentu `weight_decay` w konstruktorze. Czy nadal zachodzi overfitting? Czy sieć uczy się tak samo szybko?
6. Powiększ jeszcze sieć. Jakie są wyniki? Do powiększonej sieci dodaj warstwę dropout, wypróbuj go z różnymi wartościami prawdopodobieńśtwa wyzerowania wagi.


In [2]:
class BaselineRegressor(nn.Module):
    '''
    Baseline Reggressor architecture
    input [13] -> [13] -> [1] output
    '''

    def __init__(self):
        super(BaselineRegressor, self).__init__()
        self.layer1 = nn.Linear(13, 13)
        self.layer2 = nn.Linear(13, 1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.layer2(x)
        return x
    

class DeeperRegressor(nn.Module):
    '''
    Deeper Reggressor architecture
    input [13] -> [13] -> [6] -> [1] output
    '''

    def __init__(self):
        super(DeeperRegressor, self).__init__()
        self.layer1 = nn.Linear(13, 13)
        self.layer2 = nn.Linear(13, 6)
        self.layer3 = nn.Linear(6, 1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.layer3(x)
        return x



class DropoutRegressor(nn.Module):
    '''
    Dropout Reggressor architecture
    This net is much bigger then the others to make dropout useful.
    You usually put dropout before last layer. 
    input [13] -> [30] -> [40] -> Dropout -> [1] output
    '''

    def __init__(self, dropout_prob=0.1):
        '''
        :param dropout_prob: Probability that activation of a neuron
                             will be dropped
        '''
        super(DropoutRegressor, self).__init__()
        self.layer1 = nn.Linear(13, 30)
        self.layer2 = nn.Linear(30, 40)
        self.dropout = nn.Dropout(p=dropout_prob)
        self.last = nn.Linear(40, 1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = self.dropout(x)
        x = self.last(x)
        return x


In [3]:
def train(
    model,
    batch_size=32,
    learning_rate=3e-3,
    n_epochs=5,
    max_overfit=3,
    l2_reg=0.0001
):
    '''
    This function trains the model on boston housing dataset.
    At each epoch it trains model on train set and then evaluates if on test set.
    Function implements early stopping, when model overfits training shutdowns.

    :param model: Model instance to be trained
    :param batch_size: Size of batch
    :param learning_rate: Learning rate for optimizer (Adam)
    :param n_epochs: Number of training epochs
    :param max_overfit: Max number of epochs of loss not improving before stopping training.
    :return: (best model, loss of best model)
    '''
    train_set, val_set = load_data()

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(val_set, batch_size=batch_size)

    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           weight_decay=l2_reg,
                           )
    criterion = nn.MSELoss()
    
    overfit_epochs = 0
    best_model_loss = 1e10
    for epoch in range(n_epochs):

        # train loop
        model.train()
        for x, y in train_loader:
            model.zero_grad()
            y = y.view(-1, 1)
            pred = model(x)
            loss = criterion(pred, y)
            loss.backward()
            optimizer.step()

        # evaluation loop
        loss_sum = 0
        model.eval()
        with torch.no_grad():
            for x, y in test_loader:
                y = y.view(-1, 1)
                pred = model(x)
                loss = criterion(pred, y)
                loss_sum += loss.item()

        avg_loss = loss_sum / (len(val_set) / batch_size)
        print(f"Epoch: {epoch} loss: {avg_loss}")

        # Early stopping.
        if best_model_loss > loss_sum:
            best_model_loss = loss_sum
            best_model = model
            overfit_epochs = 0
        else:
            overfit_epochs += 1

        if overfit_epochs > max_overfit:
            break
            
    return model, avg_loss


In [4]:
seed_everything()
model = DeeperRegressor()
model, loss = train(
    model,
    batch_size=5,
    learning_rate=1e-3,
    n_epochs=100,
    max_overfit=10
)

print(model, loss)

Epoch: 0 loss: 715.7615961990957
Epoch: 1 loss: 248.62603735736036
Epoch: 2 loss: 94.42329238249562
Epoch: 3 loss: 65.02235133816878
Epoch: 4 loss: 49.35225559970526
Epoch: 5 loss: 50.858955646124414
Epoch: 6 loss: 46.91896990528257
Epoch: 7 loss: 44.87835257072149
Epoch: 8 loss: 44.570107234744576
Epoch: 9 loss: 41.106691904894014
Epoch: 10 loss: 42.416149086839575
Epoch: 11 loss: 40.95740541698426
Epoch: 12 loss: 41.89474813581452
Epoch: 13 loss: 39.99258488182008
Epoch: 14 loss: 47.58584262817864
Epoch: 15 loss: 36.096802902972605
Epoch: 16 loss: 34.515557739678336
Epoch: 17 loss: 34.43123363134429
Epoch: 18 loss: 34.27638534485825
Epoch: 19 loss: 32.39877250250869
Epoch: 20 loss: 33.81286440871832
Epoch: 21 loss: 29.67312345354576
Epoch: 22 loss: 27.829016963327966
Epoch: 23 loss: 26.777523164674054
Epoch: 24 loss: 33.405438746054344
Epoch: 25 loss: 29.198411374580203
Epoch: 26 loss: 26.882384142537756
Epoch: 27 loss: 25.44167337455149
Epoch: 28 loss: 28.1333375164843
Epoch: 29 los

In [5]:
seed_everything()
model = DropoutRegressor()
model, loss = train(
    model,
    batch_size=5,
    learning_rate=1e-3,
    n_epochs=100,
    max_overfit=10
)

print(model, loss)

Epoch: 0 loss: 52.41513259767547
Epoch: 1 loss: 47.20830421748124
Epoch: 2 loss: 44.47108831931287
Epoch: 3 loss: 50.8652522995716
Epoch: 4 loss: 40.03758842550864
Epoch: 5 loss: 39.19388864922711
Epoch: 6 loss: 39.12170406401627
Epoch: 7 loss: 35.86967529274347
Epoch: 8 loss: 35.134203077301265
Epoch: 9 loss: 42.69626974120854
Epoch: 10 loss: 36.5405582442997
Epoch: 11 loss: 49.00527691277932
Epoch: 12 loss: 31.12774113031823
Epoch: 13 loss: 46.36176950349583
Epoch: 14 loss: 39.26652367659441
Epoch: 15 loss: 22.821628315242258
Epoch: 16 loss: 22.823993436933502
Epoch: 17 loss: 21.893306487188564
Epoch: 18 loss: 22.800854660394624
Epoch: 19 loss: 21.66941151844235
Epoch: 20 loss: 21.6598980774091
Epoch: 21 loss: 18.490000564282337
Epoch: 22 loss: 23.767613125598338
Epoch: 23 loss: 18.31912462636242
Epoch: 24 loss: 18.659377954606935
Epoch: 25 loss: 19.348938864047135
Epoch: 26 loss: 19.20742992341049
Epoch: 27 loss: 24.18692925783593
Epoch: 28 loss: 18.33774029269932
Epoch: 29 loss: 20