In [21]:
import pandas as pd
import numpy as np

from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error

#### load data

In [6]:
df = pd.read_csv("../data/processed/cleaned_train.csv")

In [7]:
df = df.drop("Unnamed: 0", axis=1)

In [10]:
df.shape

(74111, 108)

#### train-test split

In [36]:
from sklearn.model_selection import train_test_split

y = df.log_price
X = df.drop('log_price', axis=1)

r_state = 2021
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=r_state)

### Neural Network

In [14]:
mlp = MLPRegressor(max_iter=200, early_stopping=True, n_iter_no_change=5)
# parameter
layers = [(200,), (220,), (300,), (200, 200), (120, 120)]

param_dist_mlp = dict(hidden_layer_sizes=layers, 
                      learning_rate=['constant','adaptive'], 
                      alpha=[0.0001, 0.002 ,0.05])

# randomized search
rand_mlp = RandomizedSearchCV(mlp, param_dist_mlp, random_state=0, 
                              n_iter=10, cv=3, verbose=0, n_jobs=-1)

In [15]:
best_model_mlp = rand_mlp.fit(X_train, y_train)

In [16]:
print('Best hidden_layer_sizes:', best_model_mlp.best_estimator_.get_params()['hidden_layer_sizes'])
print('Best learning_rate:', best_model_mlp.best_estimator_.get_params()['learning_rate'])
print('Best alpha:', best_model_mlp.best_estimator_.get_params()['alpha'])

Best hidden_layer_sizes: (220,)
Best learning_rate: constant
Best alpha: 0.05


In [19]:
best_mlp = MLPRegressor(hidden_layer_sizes=(220,), 
                         learning_rate='constant', alpha=0.05, 
                         max_iter=200, early_stopping=True, 
                         n_iter_no_change=5).fit(X_train, y_train)

In [22]:
mean_squared_error(y_train, best_mlp.predict(X_train))

0.3247441717438138

#### original price

In [37]:
y_train_exp = np.exp(y_train)

In [38]:
best_model_mlp = rand_mlp.fit(X_train, y_train_exp)

In [39]:
print('Best hidden_layer_sizes:', best_model_mlp.best_estimator_.get_params()['hidden_layer_sizes'])
print('Best learning_rate:', best_model_mlp.best_estimator_.get_params()['learning_rate'])
print('Best alpha:', best_model_mlp.best_estimator_.get_params()['alpha'])

Best hidden_layer_sizes: (120, 120)
Best learning_rate: constant
Best alpha: 0.05


In [40]:
best_mlp_exp = MLPRegressor(hidden_layer_sizes=(120, 120), 
                         learning_rate='constant', alpha=0.05, 
                         max_iter=200, early_stopping=True, 
                         n_iter_no_change=5).fit(X_train, y_train_exp)

In [41]:
mean_squared_error(y_train_exp, best_mlp.predict(X_train))

52413.31628472358