In [3]:
# import standard libraries
import numpy as np
import os
import pandas as pd

# import required libraries
from sklearn import model_selection, neural_network, metrics

In [4]:
# environment settings
os.chdir("../data")
df_train = pd.read_csv('2.0-sh-train-data.csv')
df_test = pd.read_csv('2.0-sh-test-data.csv')

In [5]:
# get the head of the train data
df_train.head()

Unnamed: 0,eps,liquidity,profitability,productivity,leverageRatio,marketBookRatio,tobinsQ,bUmd,excessReturnFromRiskModel,bHml,alpha,bSmb,bMkt,totalVolatility,outputReturn
0,-0.132617,-1.38611,-0.302013,-0.470921,0.021371,-0.135922,-0.378154,-0.493098,0.569098,-0.822272,-0.425148,-0.137759,-0.696877,-0.519608,-1.9868
1,-0.132617,-1.38611,-0.302013,-0.470921,0.021371,-0.135922,-0.378154,-0.42543,0.487661,-0.749793,-0.27987,-0.203535,-0.688845,-0.564741,8.7838
2,-0.132617,-1.38611,-0.302013,-0.470921,0.021371,-0.135922,-0.378154,-0.416329,-0.25188,-0.737266,-0.231444,-0.235516,-0.693531,-0.576062,8.1988
3,-0.132617,-1.38611,-0.302013,-0.470921,0.021371,-0.135922,-0.378154,-0.37834,-0.846599,-0.715567,-0.296012,-0.240132,-0.661736,-0.568825,8.6207
4,-0.132617,-1.38611,-0.302013,-0.470921,0.021371,-0.135922,-0.378154,-0.287721,-1.645899,-0.678657,-0.376722,-0.243759,-0.689515,-0.569304,1.0582


In [6]:
# get the head of the test data
df_test.head()

Unnamed: 0,eps,liquidity,profitability,productivity,leverageRatio,marketBookRatio,tobinsQ,bUmd,excessReturnFromRiskModel,bHml,alpha,bSmb,bMkt,totalVolatility,outputReturn
0,0.01303,1.918967,0.096819,0.277844,-0.030231,0.841679,1.277414,-0.89198,0.028556,-2.531554,2.399696,-0.025824,0.475835,1.017043,1.049
1,0.01303,1.918967,0.096819,0.277844,-0.030231,0.841679,1.277414,-0.850826,-0.957059,-2.513881,2.415838,-0.016427,0.498593,1.011446,-1.3064
2,0.01303,1.918967,0.096819,0.277844,-0.030231,0.841679,1.277414,-0.843703,-1.07086,-2.509631,2.383555,-0.044287,0.493907,1.009428,9.8097
3,0.01303,1.918967,0.096819,0.277844,-0.030231,0.841679,1.277414,-0.838163,1.294803,-2.470931,2.448122,-0.045441,0.511311,1.011269,7.4158
4,0.01303,1.918967,0.096819,0.277844,-0.030231,0.841679,1.277414,-0.718261,-0.877508,-2.487709,2.464264,-0.082203,0.62644,1.006756,21.4339


In [7]:
# set the X feature and y target for train
# df_train = df_train.head(5000)
# df_test = df_test.head(5000)
X_train = np.array(df_train.iloc[:, df_train.columns != "outputReturn"])
y_train = df_train.iloc[:, df_train.columns == "outputReturn"].values.reshape(-1,)

In [8]:
# print out the shape of X train
X_train.shape

(94729, 14)

In [9]:
# print out the shape of y train
y_train.shape

(94729,)

In [10]:
# set the X feature and y target for test
X_test = np.array(df_test.iloc[:, df_test.columns != "outputReturn"])
y_test = df_test.iloc[:, df_test.columns == "outputReturn"].values.reshape(-1,)

In [11]:
# print out the shape of X test
X_test.shape

(26841, 14)

In [12]:
# print out the shape of y test
y_test.shape

(26841,)

In [13]:
# parameters
hidden_layer_sizes = [(50,), (50, 50), (100,), (100, 100)]
activation = ['identity', 'logistic', 'tanh', 'relu']
learning_rate = ['constant', 'invscaling', 'adaptive']
learning_rate_init = [0.001, 0.01, 0.1, 0.2, 0.3]
max_iter = [100, 200, 400, 800]

parameters = dict(hidden_layer_sizes=hidden_layer_sizes, 
                  activation=activation, 
                  learning_rate=learning_rate, 
                  learning_rate_init=learning_rate_init, 
                  max_iter=max_iter)

In [14]:
# reg = neural_network.MLPRegressor(hidden_layer_sizes=(50,50), activation='tanh', learning_rate='invscaling', learning_rate_init=0.1, max_iter=200, verbose=True)
reg = model_selection.GridSearchCV(estimator=neural_network.MLPRegressor(), param_grid=parameters)

In [None]:
reg.fit(X_train, y_train)



In [None]:
predicted = reg.predict(X_test)

In [None]:
predicted

In [None]:
y_test

In [None]:
reg.best_params_

In [None]:
metrics.mean_absolute_error(y_test, predicted)

In [None]:
metrics.mean_squared_error(y_test, predicted)

In [None]:
metrics.median_absolute_error(y_test, predicted)

In [None]:
metrics.r2_score(y_test, predicted)

In [None]:
col_names =  ['Activation Function', 'Hidden Layers Size', 'Learning Rate', 'Initial Learning Rate', 'Maximum Iteration', 'Mean Absolute Error', 'Mean Squared Error', 'Median Absolute Error', 'R2 Score']
table = pd.DataFrame(columns = col_names)
table.loc[0] = [reg.best_params_['activation'], reg.best_params_['hidden_layer_sizes'], reg.best_params_['learning_rate'], reg.best_params_['learning_rate_init'], reg.best_params_['max_iter'], metrics.mean_absolute_error(y_test, predicted), metrics.mean_squared_error(y_test, predicted), metrics.median_absolute_error(y_test, predicted), metrics.r2_score(y_test, predicted)]
table

In [None]:
table.to_csv('model-output-1.csv', index=False)