In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("train.csv")
df = df.drop(['uniqueid'], axis=1)

In [2]:
# Selecting data from years 2005, and 2015

df_5 = df[df.year.isin([2005, 2015])]
X_5 = df_5.drop(["health"], axis=1)
X_5 = X_5.dropna(axis=1, how='all')
y_5 = df_5['health']

In [4]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

# naive processing using mean to impute values and not differentiating between numerical and categorical data.

imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X_5)
X_5 = imp.transform(X_5)
scaler = MinMaxScaler()
X_5 = scaler.fit_transform(X_5)

In [5]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

# training a neural network on this data.

size_hl = int(np.floor((len(X_5[1])+5)/2))
model = MLPClassifier(random_state=20803652, hidden_layer_sizes=(size_hl), max_iter=1000)
param_grid = dict(learning_rate_init=[0.001, 0.01, 0.1, 0.2], alpha=[0, 0.0001, 0.001, 0.01])
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring="neg_log_loss", n_jobs=-1, verbose=1)
grid_result = grid.fit(X_5, y_5)
print(grid_result.best_score_)
print(grid_result.best_params_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 out of  80 | elapsed:  1.3min remaining:  1.7min
[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:  1.9min finished


-1.385594227464966
{'alpha': 0, 'learning_rate_init': 0.1}


In [6]:
# Selecting data from years 2007, and 2017

df_7 = df[df.year.isin([2007, 2017])]
X_7 = df_7.drop(["health"], axis=1)
X_7 = X_7.dropna(axis=1, how='all')
y_7 = df_7['health']

In [8]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

# naive processing using mean to impute values and not differentiating between numerical and categorical data.

imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X_7)
X_7 = imp.transform(X_7)
scaler = MinMaxScaler()
X_7 = scaler.fit_transform(X_7)

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

# training a neural network on this data.

size_hl = int(np.floor((len(X_7[1])+5)/2))
model = MLPClassifier(random_state=20803652, hidden_layer_sizes=(size_hl), max_iter=1000)
param_grid = dict(learning_rate_init=[0.001, 0.01, 0.1, 0.2], alpha=[0, 0.0001, 0.001, 0.01])
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring="neg_log_loss", n_jobs=-1, verbose=1)
grid_result = grid.fit(X_7, y_7)
print(grid_result.best_score_)
print(grid_result.best_params_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 out of  80 | elapsed:  1.3min remaining:  1.7min
[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:  1.9min finished


-1.3855417612324135
{'alpha': 0, 'learning_rate_init': 0.1}


In [10]:
# Selecting data from years 2009, and 2019

df_9 = df[df.year.isin([2009, 2019])]
X_9 = df_9.drop(["health"], axis=1)
X_9 = X_9.dropna(axis=1, how='all')
y_9 = df_9['health']

In [11]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

# naive processing using mean to impute values and not differentiating between numerical and categorical data.

imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X_9)
X_9 = imp.transform(X_9)
scaler = MinMaxScaler()
X_9 = scaler.fit_transform(X_9)

In [12]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV

# training a neural network on this data.

size_hl = int(np.floor((len(X_9[1])+5)/2))
model = MLPClassifier(random_state=20803652, hidden_layer_sizes=(size_hl), max_iter=1000)
param_grid = dict(learning_rate_init=[0.001, 0.01, 0.1, 0.2], alpha=[0, 0.0001, 0.001, 0.01])
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring="neg_log_loss", n_jobs=-1, verbose=1)
grid_result = grid.fit(X_9, y_9)
print(grid_result.best_score_)
print(grid_result.best_params_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 64 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 out of  80 | elapsed:   28.4s remaining:   38.4s
[Parallel(n_jobs=-1)]: Done  80 out of  80 | elapsed:   44.6s finished


-1.4034524025713755
{'alpha': 0.01, 'learning_rate_init': 0.1}
