---

## Import Librairies

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import albumentations # Library to do augmentation on images
import time
import os
import PIL
from PIL import Image

# Graphs
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Sk Learn
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV

# Preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

#SciKit Learn
from sklearn.model_selection import train_test_split
from sklearn import metrics, model_selection

# Metrics
from sklearn.metrics import mean_squared_error

---

## Loading Dataset

In [None]:
df = pd.read_csv("../input/bostonhoustingmlnd/housing.csv")
df.head()

In [None]:
df.info()

---

## Split the Dataset

In [None]:
y = df["MEDV"]
df.drop(columns="MEDV", axis=1, inplace=True)
X = df

X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42, 
    shuffle=True
)

X_train.shape, y_train.shape, X_test.shape, y_test.shape

---

## Preprocessing

In [None]:
# Standard version
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Min Max version
scaler2 = MinMaxScaler()
scaler2.fit(X_train)
Xm_train = scaler2.transform(X_train)
Xm_test = scaler2.transform(X_test)

In [None]:
X_train, Xm_train

---

## Training

In [None]:
params = [{
    "hidden_layer_sizes": [(1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (10,), (12,), (50,), (100,)],
    "activation": ["identity", "logistic", "tanh", "relu"]
}]

In [None]:
model = GridSearchCV(
    MLPRegressor(
        solver='lbfgs', 
        alpha=0.0001, 
        max_iter=10000, 
        random_state=0, 
        max_fun=15000
    ),
    params,
    n_jobs=-1,
    verbose=8
)

model.fit(X_train, y_train)

In [None]:
model.best_params_

**Notes**
* 1st Version:
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 10 --> 1, 2, 3, 5, 10
* 2nd Version:
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 10 --> 5, 10, 40, 80, 100
* 3rd Version with R^2 score!
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 5  --> 1, 2, 3, 5, 8, 10, 12, 15, 20
* 4th Version with R^2 score!
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 5 --> 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 50, 100
* 5th Version:
    * I took out the scoring R^2 of the GridSearch to verify which score it takes by default
    * Kept the same parameters as 4th Version.
    * Kept the same `best_score_`

In [None]:
model.best_score_

### Training the MinMax Version

In [None]:
params_m = [{
    "hidden_layer_sizes": [(1,), (4,), (5,), (7,), (10,), (59,), (60,), (61,), (62,), (63,), (64,), (65,), (32, 16), (64, 32)],
    "activation": ["identity", "logistic", "tanh", "relu"]
}]

In [None]:
model_m = GridSearchCV(
    MLPRegressor(
        solver='lbfgs', 
        alpha=0.0001, 
        max_iter=10000, 
        random_state=0, 
        max_fun=15000
    ),
    params_m,
    n_jobs=-1,
    verbose=8
)

model_m.fit(Xm_train, y_train)

In [None]:
model_m.best_params_

In [None]:
model_m.best_score_

**Notes**
* 1st Version:
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 50 --> 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 50, 100
* 2nd Version:
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 60
* 3rd Version:
    * 'activation': 'relu', 
    * 'hidden_layer_sizes': 62  --> 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, (32, 16), (64, 32)

---

## Performance on the Training Set

In [None]:
y_train_pred = model.predict(X_train)

In [None]:
RMSE_train = mean_squared_error(y_train, y_train_pred, squared=False)
RMSE_train

### Graph

In [None]:
[a, b] = np.polyfit(y_train, y_train_pred, 1)
model_lin_train = np.polyfit(y_train, y_train_pred, 1)
predict_train = np.poly1d(model_lin_train)
y_lin_train = predict_train(y_train)

corr_train = np.corrcoef(y_train, y_train_pred)[0,1]
R2_train = corr_train ** 2
R2_train

### Standard Scaler

In [None]:
plt.figure(figsize=(16, 9))

plt.scatter(y_train, y_train_pred)
plt.plot(y_train, y_lin_train, 'orange', label='y = {:.2}x + {:.6}'.format(a, b))
plt.plot(y_train, y_train, color='red')

plt.title('Boston Housing Training set, R$^2$ = {:.3}'.format(R2_train), size=25)
plt.xlabel('y_train', size=20)
plt.ylabel('y_pred_train', size=20)

plt.legend()
plt.show()

### Min Max Scaler

In [None]:
ym_train_pred = model_m.predict(Xm_train)

RMSE_m_train = mean_squared_error(y_train, ym_train_pred, squared=False)
RMSE_m_train

In [None]:
[c, d] = np.polyfit(y_train, ym_train_pred, 1)
model_m_lin_train = np.polyfit(y_train, ym_train_pred, 1)
predict_train_m = np.poly1d(model_m_lin_train)
ym_lin_train = predict_train_m(y_train)

corr_train_m = np.corrcoef(y_train, ym_train_pred)[0,1]
R2_train_m = corr_train_m ** 2
R2_train_m

In [None]:
plt.figure(figsize=(16, 9))

plt.scatter(y_train, ym_train_pred)
plt.plot(y_train, ym_lin_train, 'orange', label='y = {:.2}x + {:.6}'.format(c, d))
plt.plot(y_train, y_train, color='red')

plt.title('Boston Housing Training set, R$^2$ = {:.3}'.format(R2_train_m), size=25)
plt.xlabel('y_train', size=20)
plt.ylabel('y_pred_train', size=20)

plt.legend()
plt.show()

---

## Performance on the Test Set

### Standard Scaled Version

In [None]:
y_test_pred = model.predict(X_test)

RMSE_test = mean_squared_error(y_test, y_test_pred, squared=False)
RMSE_test

In [None]:
[e, f] = np.polyfit(y_test, y_test_pred, 1)
model_lin_test = np.polyfit(y_test, y_test_pred, 1)
predict_test = np.poly1d(model_lin_test)
y_lin_test = predict_test(y_test)

corr_test = np.corrcoef(y_test, y_test_pred)[0,1]
R2_test = corr_test ** 2


plt.figure(figsize=(16, 9))
plt.scatter(y_test, y_test_pred)
plt.plot(y_test, y_lin_test, 'orange', label='y = {:.2}x + {:.6}'.format(e, f))
plt.plot(y_test, y_test, color='red')

plt.title('Boston Housing Test set, R$^2$ = {:.4}'.format(R2_test), size=25)
plt.xlabel('y_test', size=20)
plt.ylabel('y_pred_test', size=20)

plt.legend()
plt.show()

### Min Max Scaled Version

In [None]:
ym_test_pred = model_m.predict(Xm_test)

RMSE_m_test = mean_squared_error(y_test, ym_test_pred, squared=False)
RMSE_m_test

In [None]:
[g, h] = np.polyfit(y_test, ym_test_pred, 1)
model_lin_m_test = np.polyfit(y_test, ym_test_pred, 1)
predict_m_test = np.poly1d(model_lin_m_test)
ym_lin_test = predict_m_test(y_test)

corr_m_test = np.corrcoef(y_test, ym_test_pred)[0,1]
R2_m_test = corr_m_test ** 2


plt.figure(figsize=(16, 9))
plt.scatter(y_test, ym_test_pred)
plt.plot(y_test, ym_lin_test, 'orange', label='y = {:.2}x + {:.6}'.format(g, h))
plt.plot(y_test, y_test, color='red')

plt.title('Boston Housing Test set, R$^2$ = {:.4}'.format(R2_m_test), size=25)
plt.xlabel('y_test', size=20)
plt.ylabel('y_pred_test', size=20)

plt.legend()
plt.show()

---

## Conclusion

* Our best model shows a RMSE of 62k on the Test Set
* The best model looks to be the one scaled using MinMaxScaler