### Урок 3. Dataset, Dataloader, BatchNorm, Dropout, Оптимизация

    Создать Dataset для загрузки данных (sklearn.datasets.fetch_california_housing)
    Обернуть его в Dataloader
    Написать архитектуру сети, которая предсказывает стоимость недвижимости. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)
    
    Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

    train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25


In [42]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import sklearn.datasets
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing


RANDOM_STATE = 13

In [56]:
class CaliforniaHousingDataset(torch.utils.data.Dataset):
    def __init__(self, train=True):
        self.dataset = sklearn.datasets.fetch_california_housing(return_X_y=True)
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.dataset[0], self.dataset[1], 
                                                            random_state=RANDOM_STATE, test_size=0.25)
        
        self.suffix = '_train' if train else '_test'
        self.X = torch.Tensor(eval(f'self.X{self.suffix}'))
        self.y = torch.Tensor(eval(f'self.y{self.suffix}'))
        
    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx, :], self.y[idx]

In [57]:
train_data = CaliforniaHousingDataset()
test_data = CaliforniaHousingDataset(False)

In [74]:
len(train_data), len(test_data)

(15480, 5160)

In [75]:
train_loader = torch.utils.data.DataLoader(train_data,
                                           batch_size=32,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(test_data,
                                          batch_size=8,
                                          shuffle=False)

In [89]:
class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim, activation="relu"):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim)
        self.activation = activation
        
    def forward(self, x):
        x = self.fc(x)
        if self.activation == "relu":
            return F.relu(x)
        if self.activation == "sigmoid":
            return F.sigmoid(x)
        raise RuntimeError
        

class FeedForward(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.fc1 = Perceptron(input_dim, 4*hidden_dim)
        self.dp = nn.Dropout(0.25)
        self.bn = nn.BatchNorm1d(4*hidden_dim)
        self.fc2 = Perceptron(4*hidden_dim, 2*hidden_dim)
        self.fc3 = Perceptron(2*hidden_dim, 1, "relu")
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.dp(x)
        x = self.bn(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [92]:
net = FeedForward(train_loader.dataset.X_train.shape[1], 32)

criterion = nn.MSELoss()

In [93]:
def train_function():

    num_epochs = 25

    for epoch in range(num_epochs):
        running_loss, running_items = 0.0, 0.0
        net.train()

        for i, data in enumerate(train_loader):
            inputs, labels = data[0], data[1]

            # обнуляем градиент
            optimizer.zero_grad()

            outputs = net(inputs)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()

            # выводим статистику о процессе обучения
            running_loss += loss.item()
            running_items += len(labels)

            # выводим статистику о процессе обучения
            if i % 200 == 0:    # печатаем каждые 200 шагов
                net.eval()
                test_running_loss, test_running_total = 0.0, 0.0

                for j, data in enumerate(test_loader):

                    test_outputs = net(data[0])
                    test_running_total += len(data[1])
                    test_loss = criterion(test_outputs.squeeze(), data[1]) 
                    test_running_loss += test_loss.item()

                print(f'Epoch [{epoch + 1}/{num_epochs}]. ' \
                      f'Step [{i + 1}/{len(train_loader)}]. ' \
                      f'Loss: {running_loss / running_items:.3f}. '\
                      f'Test loss: {test_running_loss / test_running_total:.3f}')

                running_loss, running_items = 0.0, 0.0


    print('Training is finished!\n')

In [96]:
%%time

optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
train_function()

Epoch [1/25]. Step [1/484]. Loss: 0.040. Test loss: 0.057
Epoch [1/25]. Step [201/484]. Loss: 0.014. Test loss: 0.057
Epoch [1/25]. Step [401/484]. Loss: 0.014. Test loss: 0.059
Epoch [2/25]. Step [1/484]. Loss: 0.032. Test loss: 0.062
Epoch [2/25]. Step [201/484]. Loss: 0.014. Test loss: 0.055
Epoch [2/25]. Step [401/484]. Loss: 0.014. Test loss: 0.059
Epoch [3/25]. Step [1/484]. Loss: 0.034. Test loss: 0.059
Epoch [3/25]. Step [201/484]. Loss: 0.016. Test loss: 0.056
Epoch [3/25]. Step [401/484]. Loss: 0.014. Test loss: 0.055
Epoch [4/25]. Step [1/484]. Loss: 0.039. Test loss: 0.056
Epoch [4/25]. Step [201/484]. Loss: 0.013. Test loss: 0.056
Epoch [4/25]. Step [401/484]. Loss: 0.014. Test loss: 0.070
Epoch [5/25]. Step [1/484]. Loss: 0.046. Test loss: 0.055
Epoch [5/25]. Step [201/484]. Loss: 0.014. Test loss: 0.056
Epoch [5/25]. Step [401/484]. Loss: 0.014. Test loss: 0.066
Epoch [6/25]. Step [1/484]. Loss: 0.048. Test loss: 0.069
Epoch [6/25]. Step [201/484]. Loss: 0.014. Test loss

In [97]:
%%time

optimizer = torch.optim.RMSprop(net.parameters(), lr=0.001)
train_function()

Epoch [1/25]. Step [1/484]. Loss: 0.047. Test loss: 0.129
Epoch [1/25]. Step [201/484]. Loss: 0.018. Test loss: 0.055
Epoch [1/25]. Step [401/484]. Loss: 0.015. Test loss: 0.056
Epoch [2/25]. Step [1/484]. Loss: 0.042. Test loss: 0.058
Epoch [2/25]. Step [201/484]. Loss: 0.015. Test loss: 0.058
Epoch [2/25]. Step [401/484]. Loss: 0.015. Test loss: 0.062
Epoch [3/25]. Step [1/484]. Loss: 0.023. Test loss: 0.056
Epoch [3/25]. Step [201/484]. Loss: 0.015. Test loss: 0.055
Epoch [3/25]. Step [401/484]. Loss: 0.014. Test loss: 0.055
Epoch [4/25]. Step [1/484]. Loss: 0.035. Test loss: 0.055
Epoch [4/25]. Step [201/484]. Loss: 0.014. Test loss: 0.052
Epoch [4/25]. Step [401/484]. Loss: 0.015. Test loss: 0.065
Epoch [5/25]. Step [1/484]. Loss: 0.046. Test loss: 0.055
Epoch [5/25]. Step [201/484]. Loss: 0.014. Test loss: 0.066
Epoch [5/25]. Step [401/484]. Loss: 0.015. Test loss: 0.055
Epoch [6/25]. Step [1/484]. Loss: 0.049. Test loss: 0.054
Epoch [6/25]. Step [201/484]. Loss: 0.014. Test loss

In [98]:
%%time

optimizer = torch.optim.SGD(net.parameters(), lr=0.001)
train_function()

Epoch [1/25]. Step [1/484]. Loss: 0.060. Test loss: 0.298
Epoch [1/25]. Step [201/484]. Loss: 0.037. Test loss: 0.119
Epoch [1/25]. Step [401/484]. Loss: 0.030. Test loss: 0.110
Epoch [2/25]. Step [1/484]. Loss: 0.036. Test loss: 0.154
Epoch [2/25]. Step [201/484]. Loss: 0.030. Test loss: 0.138
Epoch [2/25]. Step [401/484]. Loss: 0.027. Test loss: 0.133
Epoch [3/25]. Step [1/484]. Loss: 0.045. Test loss: 0.112
Epoch [3/25]. Step [201/484]. Loss: 0.029. Test loss: 0.128
Epoch [3/25]. Step [401/484]. Loss: 0.027. Test loss: 0.104
Epoch [4/25]. Step [1/484]. Loss: 0.037. Test loss: 0.107
Epoch [4/25]. Step [201/484]. Loss: 0.028. Test loss: 0.102
Epoch [4/25]. Step [401/484]. Loss: 0.027. Test loss: 0.120
Epoch [5/25]. Step [1/484]. Loss: 0.099. Test loss: 0.099
Epoch [5/25]. Step [201/484]. Loss: 0.027. Test loss: 0.134
Epoch [5/25]. Step [401/484]. Loss: 0.027. Test loss: 0.127
Epoch [6/25]. Step [1/484]. Loss: 0.032. Test loss: 0.101
Epoch [6/25]. Step [201/484]. Loss: 0.026. Test loss

#### Вывод: 
    в данном примере лучший результат в совокупности оценка-время показал оптимизатор RMSprop. 