In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import sklearn as skl
import numpy as np

import seaborn as sns
sns.set(font_scale=2)

%matplotlib inline

In [None]:
import sklearn.decomposition
import sklearn.random_projection
import sklearn.neural_network
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [None]:
train = pd.read_csv('../data/training.csv')

train.head()

In [None]:
data_columns = [column for column in train.columns if column.startswith('m')]
wavenumbers = [float(column.lstrip('m')) for column in data_columns]

output_columns = ["Ca","P","pH","SOC","Sand"]

X = train[data_columns].as_matrix()
y = train[output_columns].as_matrix()

In [None]:
def MCWRMSE(model, X, y):
    """actually negative MCWRMSE so it can be used as a 'score',
        which should increase when you're doing better"""
    y_hat = model.predict(X)
    return -np.mean(np.sqrt(np.mean(np.square(y-y_hat),axis=0)))

In [None]:
param_grid = {'hidden_layer_sizes':[(100,10,5),
                                    (100),
                                    (10),
                                    ],
              'tol' : [1e-64,1e-16],
              'early_stopping': [True,False],
              'batch_size': [16,32,128],
              'max_iter' : [10000],
              'alpha' : [10e-5,10e-1],
              'learning_rate_init' : [10e-5,10e-1],
              'activation': ['relu','logistic'],
              'beta_1': [0.9,0.95,0.99],
              'beta_2': [0.9,0.99,0.999]
             }

In [None]:
grid_size = np.prod([len(lst) for lst in param_grid.values()])
print(grid_size)

In [None]:
grid_searcher = GridSearchCV(estimator=sklearn.neural_network.MLPRegressor(),
                             scoring=MCWRMSE,
                            param_grid = param_grid
                            )

In [None]:
transformed_X = sklearn.decomposition.PCA(n_components=100,whiten=False).fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(transformed_X, y,
                                                    test_size=0.2,)

In [None]:
grid_searcher.fit(X_train,y_train)

In [None]:
grid_searcher.best_score_