In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.metrics import roc_auc_score

In [2]:
X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)

# Shuffle rows of X and Y in the same way
s = np.arange(X.shape[0])
np.random.seed(0)
np.random.shuffle(s)

X = X[s]
Y = Y[s]

print(X.shape)

(200000, 14)


In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
print(X_train.shape)

(160000, 14)


In [4]:
# Scale the data, fitting only on training data
scaler = StandardScaler().fit(X_train)
#scaler = MinMaxScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
clf = MLPClassifier(
    alpha=10.0**-8,
    hidden_layer_sizes=(100, 100, 100),
)

parameters = {
}

grid = GridSearchCV(clf, parameters, scoring='roc_auc', n_jobs=-1, verbose=True)
grid.fit(X, Y)

print(grid.best_score_)
print(grid.best_params_)

Fitting 3 folds for each of 1 candidates, totalling 3 fits


[Parallel(n_jobs=-1)]: Done   3 out of   3 | elapsed:  2.2min finished


0.6413389248691371
{}


In [8]:
predictions = grid.predict_proba(X)
score = roc_auc_score(Y, predictions[:,1])

print(score)

0.5592770051115261


## Scores

```
layers=(100,100,100) => 0.732873616429
layers=(100,50,50,100) => 0.724782415565
```

# Ready to submit?

Run this code and submit the `Y_submit.txt` file generated.

In [None]:
X = np.genfromtxt('data/X_train.txt', delimiter=None)
Y = np.genfromtxt('data/Y_train.txt', delimiter=None)

X_submit = np.genfromtxt('data/X_test.txt', delimiter=None)

# Shuffle rows of X and Y in the same way
s = np.arange(X.shape[0])
np.random.seed(0)
np.random.shuffle(s)

X = X[s]
Y = Y[s]

print X.shape

# Scale the data, fitting only on training data
scaler = StandardScaler().fit(X)
X = scaler.transform(X)
X_submit = scaler.transform(X_submit)

In [None]:
clf = MLPClassifier(
    alpha=10.0**-8,
    hidden_layer_sizes=(100, 100, 100)
)
clf.fit(X, Y)
predictions = clf.predict(X_submit)

In [None]:
Y_submit = np.vstack((np.arange(X_submit.shape[0]), predictions)).T
np.savetxt('Y_submit.txt', Y_submit, '%d, %.2f', header='ID,Prob1',comments='',delimiter=',')

In [None]:
print(predictions)