# Dataset, Dataloader, BatchNorm, Dropout, Оптимизация

# Домашнее задание

Будем практиковаться на датасете недвижимости (sklearn.datasets.fetch_california_housing или  https://www.kaggle.com/c/avito-demand-prediction)

Ваша задача:
1. Создать Dataset для загрузки данных
2. Обернуть его в Dataloader
3. Написать архитектуру сети, которая предсказывает стоимость недвижимости (или которая предсказывает число показов соответственно) на основании числовых данных. Можно сделать генерацию новых признаков. Сеть должна включать BatchNorm слои и Dropout (или НЕ включать, но нужно обосновать)
4. Сравните сходимость Adam, RMSProp и SGD, сделайте вывод по качеству работы модели

train-test разделение нужно сделать с помощью sklearn random_state=13, test_size = 0.25


In [36]:
import torch

In [37]:
import torch.nn as nn

In [38]:
import pandas as pd
import numpy as np

In [39]:
from torch.utils.data import DataLoader, Dataset

In [40]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [41]:
from sklearn.preprocessing import StandardScaler

In [42]:
# Работа с данными

In [43]:
housedata=fetch_california_housing()

In [44]:
housedata

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [45]:
# Разделим на тестовые и тренеровочные данные
X_train,X_test,y_train,y_test=train_test_split(housedata.data,housedata.target,test_size=0.25,random_state=13)

In [46]:
# Нормализуем данные и подготовим их для дальнейшего использования в нашем dstaset
scale = StandardScaler()
X_train_s = scale.fit_transform(X_train)
X_test_s = scale.transform(X_test)

In [47]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [48]:
train_x = torch.from_numpy(X_train_s.astype(np.float32)).to(DEVICE)
train_y = torch.from_numpy(y_train.astype(np.float32)).to(DEVICE)

test_x = torch.from_numpy(X_test_s.astype(np.float32)).to(DEVICE)
test_y = torch.from_numpy(y_test.astype(np.float32)).to(DEVICE)

In [49]:
class RangeDataset(Dataset):
    def __init__(self, *init_datasets):
        assert all(init_datasets[0].size(0) == init_dataset.size(0) for init_dataset in init_datasets), "Несоотвутствует размерность среди dataset"
        self._base_datasets = init_datasets

    def __len__(self):
        return self._base_datasets[0].size(0)

    def __getitem__(self, idx):
        return tuple(base_dataset[idx] for base_dataset in self._base_datasets)

In [50]:
train_dataset = RangeDataset(train_x, train_y)
test_dataset = RangeDataset(test_x, test_y)

In [51]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=0, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=0, drop_last=True)

In [52]:
class My_Net(nn.Module):
    def __init__(self) -> None:
        super(My_Net, self).__init__()
        self.block_1 = nn.Sequential(
            nn.Linear(in_features=8, out_features=64, bias=True),
            nn.Dropout(0.3),
            nn.BatchNorm1d(64),
            nn.ReLU())
        self.block_2 = nn.Sequential(
            nn.Linear(in_features=64, out_features=32, bias=True),
            nn.Dropout(0.3),
            nn.BatchNorm1d(32),
            nn.ReLU())
        self.block_3 = nn.Sequential(
            nn.Linear(in_features=32, out_features=16),
            nn.Dropout(0.3),
            nn.BatchNorm1d(16),
            nn.ReLU())    
        self.predict = nn.Sequential(
            nn.Linear(in_features=16, out_features=1, bias=True),
#             nn.BatchNorm1d(1),
            nn.Sigmoid())

    def forward(self, inp):
        out = self.block_1(inp)
        out = self.block_2(out)
        out = self.block_3(out)
        out = self.predict(out)
        return out[:, 0]

In [53]:
EPOCHES = 10

In [54]:
def train_loop(train_loader, test_loader, net, optimizer):
    loss_fn = nn.MSELoss()
    best_acc = {'train': None, 'test': None}
    net.train()
    for epoch in range(EPOCHES):
        running_loss, running_items, running_right = 0.0, 0.0, 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            outputs = net(inputs)
            loss = loss_fn(outputs, labels)

        # обнуляем градиент
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # выводим статистику о процессе обучения
        running_loss += loss.item()
        running_items += len(labels)

        if i % 150 == 0 or (i + 1) == len(train_loader):    # печатаем каждые 300 mini-batches
            net.eval()

            test_loss, test_running_total, test_loss  = 0.0, 0.0, 0.0
            for y, (out_test, lbl_test) in enumerate(test_loader):
                test_outputs = net(out_test)
                test_loss += loss_fn(test_outputs, lbl_test)
                test_running_total += len(lbl_test)

            res_loss_train = running_loss / running_items
            res_loss_test = test_loss / test_running_total

            if best_acc['train'] is None or res_loss_train < best_acc['train']:
                best_acc['train'] = res_loss_train

            if best_acc['test'] is None or res_loss_test < best_acc['test']:
                best_acc['test'] = res_loss_train

            print(f'Epoch [{epoch + 1}/{EPOCHES}]. ' \
                  f'Step [{i + 1}/{len(train_loader)}]. ' \
                  f'Loss: {res_loss_train:.3f}. '\
                  f'Test acc: {res_loss_test:.3f}.')

            running_loss, running_items = 0.0, 0.0
            net.train()
    print(f"Best acc train: {best_acc['train']:.3f}. Best acc test: {best_acc['test']:.3f}")
    print('Training is finished!')

### РАБОТА НЕЙРОННОЙ СЕТИ С РАЗНЫМИ ОПТИМИЗАТОРАМИ

In [55]:
from torch.optim import Adam, RMSprop, SGD

### ADAM

In [56]:
net = My_Net().to(DEVICE)
optimizer = Adam(net.parameters(), lr=0.001,betas=(0.9, 0.999))

In [62]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [483/483]. Loss: 0.152. Test acc: 0.117.
Epoch [2/10]. Step [483/483]. Loss: 0.077. Test acc: 0.116.
Epoch [3/10]. Step [483/483]. Loss: 0.081. Test acc: 0.116.
Epoch [4/10]. Step [483/483]. Loss: 0.089. Test acc: 0.116.
Epoch [5/10]. Step [483/483]. Loss: 0.099. Test acc: 0.116.
Epoch [6/10]. Step [483/483]. Loss: 0.088. Test acc: 0.117.
Epoch [7/10]. Step [483/483]. Loss: 0.166. Test acc: 0.116.
Epoch [8/10]. Step [483/483]. Loss: 0.155. Test acc: 0.116.
Epoch [9/10]. Step [483/483]. Loss: 0.116. Test acc: 0.116.
Epoch [10/10]. Step [483/483]. Loss: 0.140. Test acc: 0.116.
Best acc train: 0.077. Best acc test: 0.077
Training is finished!
Wall time: 6.29 s


### SGD

In [59]:
net = My_Net().to(DEVICE)
optimizer = SGD(net.parameters(), lr=0.001)

In [63]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [483/483]. Loss: 0.075. Test acc: 0.116.
Epoch [2/10]. Step [483/483]. Loss: 0.085. Test acc: 0.117.
Epoch [3/10]. Step [483/483]. Loss: 0.137. Test acc: 0.116.
Epoch [4/10]. Step [483/483]. Loss: 0.119. Test acc: 0.116.
Epoch [5/10]. Step [483/483]. Loss: 0.085. Test acc: 0.116.
Epoch [6/10]. Step [483/483]. Loss: 0.084. Test acc: 0.116.
Epoch [7/10]. Step [483/483]. Loss: 0.101. Test acc: 0.115.
Epoch [8/10]. Step [483/483]. Loss: 0.152. Test acc: 0.116.
Epoch [9/10]. Step [483/483]. Loss: 0.102. Test acc: 0.116.
Epoch [10/10]. Step [483/483]. Loss: 0.140. Test acc: 0.116.
Best acc train: 0.075. Best acc test: 0.075
Training is finished!
Wall time: 6.27 s


### RMSprop

In [64]:
net = My_Net().to(DEVICE)
optimizer = RMSprop(net.parameters(), lr=0.001, alpha=0.99)

In [69]:
%%time
train_loop(train_loader, test_loader, net, optimizer)

Epoch [1/10]. Step [483/483]. Loss: 0.061. Test acc: 0.102.
Epoch [2/10]. Step [483/483]. Loss: 0.130. Test acc: 0.102.
Epoch [3/10]. Step [483/483]. Loss: 0.081. Test acc: 0.102.
Epoch [4/10]. Step [483/483]. Loss: 0.095. Test acc: 0.100.
Epoch [5/10]. Step [483/483]. Loss: 0.121. Test acc: 0.102.
Epoch [6/10]. Step [483/483]. Loss: 0.079. Test acc: 0.099.
Epoch [7/10]. Step [483/483]. Loss: 0.075. Test acc: 0.101.
Epoch [8/10]. Step [483/483]. Loss: 0.102. Test acc: 0.103.
Epoch [9/10]. Step [483/483]. Loss: 0.092. Test acc: 0.101.
Epoch [10/10]. Step [483/483]. Loss: 0.063. Test acc: 0.099.
Best acc train: 0.061. Best acc test: 0.061
Training is finished!
Wall time: 6.26 s


### Вывод: Лучший результат (по точности) показала нейронная сеть с оптимизизатором Adam, немного хуже нейронная сеть с оптимизизатором SGD, самый худший результат нейронная сеть с оптимизизатором RMSprop. Сети с RMSprop понадобилось больше попыток прогона для схождения результатов тренировочного и тестового датасета. Для задач подобного рода лучше приспособлены оптимизаторы ADAM и SGD   