In [95]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score,StratifiedKFold,GridSearchCV
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense,Activation,Dropout
from keras.wrappers.scikit_learn import KerasClassifier

In [49]:
sc = StandardScaler()
train = pd.read_csv('data/wh18_Train.csv')
test=pd.read_csv('data/wh18_Test.csv')
X_train = train.loc[:,train.columns != 'Class']
y_train = train.loc[:,train.columns == 'Class']
X_train = pd.DataFrame(sc.fit_transform(X_train), columns=X_train.columns)
X_test = test.loc[:,test.columns != 'Class']
X_test = pd.DataFrame(sc.transform(X_test), columns=X_test.columns)

In [103]:
def build_model(dropout_rate=0.0,neurons=24,activation='sigmoid',init_mode='he_normal'):
    model = Sequential()
    model.add(Dense(units=neurons,activation=activation,input_dim=X_train.shape[1],kernel_initializer=init_mode))
    model.add(Dropout(dropout_rate))
    model.add(Dense(units=1,activation='sigmoid'))
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    return model

In [105]:
classifier = KerasClassifier(build_fn=build_model,epochs =20,batch_size=20,shuffle=False,verbose=0)

In [80]:
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
dropout_rate = np.arange(0,1.1,0.1)
neurons = np.arange(0,32,4)
activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
param_grid = dict(init_mode=init_mode,dropout_rate=dropout_rate,activation=activation,neurons=neurons)
kf = StratifiedKFold(n_splits=3)
grid = GridSearchCV(estimator=classifier, param_grid=param_grid, n_jobs=-1, cv=kf,scoring='roc_auc')
grid_result = grid.fit(X_train, y_train)
print(grid_result.best_params_)
print(grid_result.best_score_)

{'init_mode': 'he_normal'}
0.9006279550827423


In [93]:
from sklearn.model_selection import cross_val_score,StratifiedKFold
kf = StratifiedKFold(n_splits=5)
scores = cross_val_score(classifier, X_train, y_train, cv=kf,scoring='roc_auc')
print(scores.mean())

0.8542271027184819


In [86]:
classifier.fit(X_train,y_train)
pd.DataFrame(classifier.predict(X_test),columns=['Class']).to_csv('data/submission.csv')

In [66]:
y_train['Class'].value_counts()

0    1620
1     143
Name: Class, dtype: int64