In [1]:
import numpy as np
import pandas as pd 
%matplotlib inline
import datetime
import matplotlib.pyplot as plt
import sklearn
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from scipy.stats import zscore
from imblearn.over_sampling import SMOTE
from scipy.stats import itemfreq
from time import time

In [2]:
def createSubmission(filename, y):
    fo = open( filename , 'w' )
    fo.write("ImageId,PredictedClass\n")
    for i in range(y.shape[0]):
        fo.write(str(i)+","+str(y[i])+"\n")
    fo.close()

def nnPredict(model,x, y):
    ypredict= model.predict(x)
    score= f1_score(y , ypredict, average='macro');
    return score

In [3]:
xin= np.load("./X_train.npy")
yin= np.load("./y_train.npy")
xout= np.load("./X_test.npy")

In [4]:
#scale data
def normalize(x):
    r=zscore(x, axis=0)
    return r

In [6]:
def overSampling(x, y):
    sm = SMOTE(kind='svm')
    x_resampled, y_resampled = sm.fit_sample(x, y)
    return (x_resampled, y_resampled)

In [7]:
xin= np.load("./X_train.npy")
yin= np.load("./y_train.npy")
x= xin
y= yin

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.3, random_state=0)
for _ in range(1):
    x_train,y_train = overSampling(x_train, y_train)
print(x_train.shape)
print(x.shape)
print(itemfreq(yin))
print(itemfreq(y_train))

x_train= normalize(x_train)
x_test= normalize(x_test)

(785, 1850)
(966, 1850)
[[  0  51]
 [  1 183]
 [  2  85]
 [  3 395]
 [  4  86]
 [  5  60]
 [  6 106]]
[[  0 143]
 [  1 136]
 [  2  57]
 [  3 277]
 [  4  62]
 [  5  44]
 [  6  66]]


In [22]:
nn = MLPClassifier(hidden_layer_sizes=(1851,400,),
                   activation= 'tanh',
                   solver= 'lbfgs',
                   alpha= 2,
                   max_iter= 500,
                   learning_rate_init= 0.001,
                   verbose= True,
                   early_stopping= True,
                   validation_fraction= 0.3,
                   tol= 0.0001
                  )

nn.fit(x_train,y_train)
y_p= nn.predict(x_train)
print('Performance in sample:\n')
print(classification_report(y_train, y_p))
print()
print(f1_score(y_train, y_p, average= None))
print()
y_pred = nn.predict(x_test)
print("Detailed classification report:")
print()
print(classification_report(y_test, y_pred))
print()
print(f1_score(y_test, y_pred, average= None))
print()
score= nnPredict(nn, x_test, y_test)
print("F1 _macro:" + str(score))

Performance in sample:

             precision    recall  f1-score   support

          0       1.00      1.00      1.00       143
          1       1.00      1.00      1.00       136
          2       1.00      1.00      1.00        57
          3       1.00      1.00      1.00       277
          4       1.00      1.00      1.00        62
          5       1.00      1.00      1.00        44
          6       1.00      1.00      1.00        66

avg / total       1.00      1.00      1.00       785


[ 1.  1.  1.  1.  1.  1.  1.]

Detailed classification report:

             precision    recall  f1-score   support

          0       0.68      0.76      0.72        17
          1       0.71      0.74      0.73        47
          2       0.77      0.82      0.79        28
          3       0.92      0.92      0.92       118
          4       0.83      0.79      0.81        24
          5       0.73      0.69      0.71        16
          6       0.89      0.82      0.86        40

avg /

In [23]:
x_in= xin
y_in= yin
for _ in range(1):
    x_in,y_in = overSampling(x_in, y_in)
    

In [25]:
NEURAL_NETWORK_ARCH = ((1851,400,))
DATE = '10_30_2'

nn = MLPClassifier(hidden_layer_sizes=(1851,400,),
                   activation= 'tanh',
                   solver= 'lbfgs',
                   alpha= 2,
                   max_iter= 500,
                   learning_rate_init= 0.001,
                   verbose= True,
                   early_stopping= True,
                   validation_fraction= 0.3,
                   tol= 0.00001
                  )
x_in= normalize(x_in)
nn.fit(x_in,y_in)

# Run the trained neural network on the test set and write results to a csv file
filename = './submission_nn_' + DATE + '_' + str(NEURAL_NETWORK_ARCH) + '.csv'
x_out= normalize(xout)
y_out = nn.predict(x_out)
createSubmission(filename, y_out)

In [None]:
#Score on Kaggle achived: 0.84472 with tol= 0.0001 |  with 0.83851 tol= 0.00001
