#### _Нейронные сети и глубокое обучение / ДЗ №2 / Упражнение 2_

* **Датасет Бостон**
* Взять за основу конфигурацию нейронной сети (5,1) 
* Подбирая число нейронной в слоях, число слоев (не более двух), функцию активации, способ инициализации весов, число эпох, использование нормализации данных, скорость обучения и др.
* Использовать train_test_split с test_size=20%, для создания обучающей и проверочной выборки
* Приблизиться к бейзлайну MSE = 20000 (4500 $, если извлечь корень) на проверочной выборке

In [1]:
import matplotlib.pyplot as plt
import numpy as np

import warnings
# warnings.filterwarnings('ignore')

In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD
from keras.wrappers.scikit_learn import KerasRegressor

Using TensorFlow backend.


In [3]:
import sys
print(f'python {sys.version}')
import tensorflow as tf
print(f'tensorflow {tf.__version__}')
import keras
print(f'keras {keras.__version__}')

python 3.6.8 |Anaconda custom (64-bit)| (default, Feb 21 2019, 18:30:04) [MSC v.1916 64 bit (AMD64)]
tensorflow 1.12.0
keras 2.2.4


In [4]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
print(keras.backend.tensorflow_backend._get_available_gpus())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1518047213949839434
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 1437731635
locality {
  bus_id: 1
  links {
  }
}
incarnation: 7572537124024569213
physical_device_desc: "device: 0, name: GeForce MX150, pci bus id: 0000:01:00.0, compute capability: 6.1"
]
['/job:localhost/replica:0/task:0/device:GPU:0']


In [5]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

### Загрузка данных

In [6]:
boston = datasets.load_boston()

X = boston['data']
y = boston['target']

X.shape, y.shape

((506, 13), (506,))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

[ d.shape for d in [X_train, y_train, X_test, y_test] ]

[(404, 13), (404,), (102, 13), (102,)]

### Функция создания модели 

In [8]:
def build_model(input_dim, n_units, activation='tanh', kernel_initializer='glorot_uniform', lr=0.01):
    model = Sequential()
    for layer_units in n_units:
        model.add(Dense(layer_units, input_dim=input_dim, activation=activation, kernel_initializer=kernel_initializer))
        input_dim = layer_units
    model.add(Dense(1, input_dim=input_dim, activation='linear'))    
    sgd = SGD(lr=lr, momentum=0.9, nesterov=True)
    model.compile(loss='mean_squared_error', optimizer=sgd)
    return model

Попробуем какую-нибудь модель:

In [9]:
model = build_model(X_train.shape[1], np.array([20, 15]))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 20)                280       
_________________________________________________________________
dense_2 (Dense)              (None, 15)                315       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 16        
Total params: 611
Trainable params: 611
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.fit(X_train, y_train, epochs=200, verbose=0)

model.evaluate(X_test, y_test, verbose=0)

64.6820618872549

Попробуем применить стандартизацию к исходным данным:

In [11]:
scaler = StandardScaler()

model.fit(scaler.fit_transform(X_train), y_train, epochs=200, verbose=0)

model.evaluate(scaler.transform(X_test), y_test, verbose=0)

44.26659647623698

Уже относительно неплохо, но попробуем улучшить (с помощю обычного перебора по сетке):

### Подбор гиперпараметров модели

In [12]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('mlp', KerasRegressor(build_model, input_dim=X_train.shape[1], verbose=0))
])

tree_grid_params = {
    'mlp__n_units': [ np.array(x) for x in [(10,), (50,), (10, 10), (10, 30), (30, 30)] ],
    'mlp__activation': ['sigmoid', 'tanh'],
    'mlp__kernel_initializer': ['glorot_uniform', 'he_normal'],
    'mlp__lr': [0.01],
    'mlp__epochs': [50, 100, 200]
}

In [13]:
%%time

gs_model = GridSearchCV(pipeline, tree_grid_params, scoring='neg_mean_squared_error', cv=3, verbose=0, iid=True)

gs_model.fit(X_train, y_train)

Wall time: 20min 8s


In [14]:
gs_model.best_params_

{'mlp__activation': 'sigmoid',
 'mlp__epochs': 100,
 'mlp__kernel_initializer': 'glorot_uniform',
 'mlp__lr': 0.01,
 'mlp__n_units': array([10, 30])}

In [15]:
print(f'Best train MSE: {-gs_model.best_score_}')

Best train MSE: 10.796922215626699


In [16]:
scaler = gs_model.best_estimator_.steps[0][1]

model = gs_model.best_estimator_.steps[1][1].model

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_472 (Dense)            (None, 10)                140       
_________________________________________________________________
dense_473 (Dense)            (None, 30)                330       
_________________________________________________________________
dense_474 (Dense)            (None, 1)                 31        
Total params: 501
Trainable params: 501
Non-trainable params: 0
_________________________________________________________________


In [17]:
print(f'Test MSE: {model.evaluate(scaler.transform(X_test), y_test, verbose=0):.3f}')

Test MSE: 13.856


Попробуем модель с теми же пареметрами ещё раз на другом разбиении:

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

scaler = StandardScaler()

model = build_model(X_train.shape[1], np.array([10, 30]), activation='sigmoid', kernel_initializer='glorot_uniform', lr=0.01)

model.fit(scaler.fit_transform(X_train), y_train, epochs=100, verbose=0)

model.evaluate(scaler.transform(X_test), y_test, verbose=0)

9.49492468553431