In [7]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from warnings import filterwarnings
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
filterwarnings('ignore')

# Prepare

In [3]:
# Data
df = pd.read_csv("../datas/Hitters.csv")
df = df.dropna()

# Dummie
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1)
X = pd.concat([X, dms[["League_N", "Division_W", "NewLeague_N"]]], axis=1)

# Test Train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state=42)

# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)

# Model

In [4]:
mlp_model = MLPRegressor(hidden_layer_sizes=(100,20,10,5)).fit(X_train_scaled, y_train)

# Predict

In [5]:
y_pred = mlp_model.predict(X_test_scaled)
np.sqrt(mean_squared_error(y_test, y_pred))

355.24979264866465

# Model Tuning

In [6]:
# Cross Validition
mlp_params = {'alpha': [0.1, 0.01,0.02,0.005],
             'hidden_layer_sizes': [(20,20),(100,50,150),(300,200,150)],
             'activation': ['relu','logistic']}

mlp_cv_model = GridSearchCV(mlp_model, mlp_params, cv=10).fit(X_train_scaled, y_train)
params=pd.Series(mlp_cv_model.best_params_)

# Tuned Model
mlp_tuned = MLPRegressor(alpha=params[1], hidden_layer_sizes=params[2], activation=params[0]).fit(X_train_scaled, y_train)
y_pred = mlp_tuned.predict(X_test_scaled)

# Print Values
print(f"{params}\n"
      f"MSE: {np.sqrt(mean_squared_error(y_test, y_pred))}")

activation                      relu
alpha                           0.02
hidden_layer_sizes    (100, 50, 150)
dtype: object
MSE: 349.6816206268958
