# Play with keras model

In [27]:
%pylab inline
import numpy as np
import csv
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.utils import np_utils
from sklearn import metrics
from sklearn.cross_validation import KFold
from sklearn.preprocessing import StandardScaler
from preprocess import loadPreData

'''
    This demonstrates how to reach a 0.80 ROC AUC score (local 4-fold validation)
    in the Kaggle Nile virus prediction challenge. 

    The model trains in a few seconds on CPU.
'''

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


'\n    This demonstrates how to reach a 0.80 ROC AUC score (local 4-fold validation)\n    in the Kaggle Nile virus prediction challenge. \n\n    The model trains in a few seconds on CPU.\n'

In [4]:
data = loadPreData()

Index([u'Address', u'Species', u'Block', u'Street', u'Trap',
       u'AddressNumberAndStreet', u'Latitude', u'Longitude',
       u'AddressAccuracy', u'NumMosquitos', u'WnvPresent'],
      dtype='object')
Index([u'Id', u'Address', u'Species', u'Block', u'Street', u'Trap',
       u'AddressNumberAndStreet', u'Latitude', u'Longitude',
       u'AddressAccuracy'],
      dtype='object')


In [43]:
X = data['train'].get_values().astype(float)
y = data['labels']

In [44]:
input_dim = X.shape[1]
output_dim = 1

In [47]:
def scoreAUC(y,probs):
    ps = np.linspace(0.,1.,num=100)
    prs = []
    nrs = []
    for p in ps:
        preds = probs[:,0]<p
        pr = np.sum((y & preds))/float(np.sum(y))
        nr = np.sum((1-y & 1-preds))/float(np.sum(1-y))
        nrs.append(nr)
        prs.append(pr)
    xs = 1-np.array(nrs)
    ys = np.array(prs)
    dxs = xs[1:] - xs[:-1]
    ays = .5*(ys[1:] + ys[:-1])
    auc = np.sum(ays*dxs)
    return {'score':auc,'fpr':xs,'tpr':ys}

In [38]:
def build_model(input_dim, output_dim):
    model = Sequential()
    model.add(Dense(32, input_dim=input_dim))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(32))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(output_dim))

    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
    return model

In [42]:
def testModel():    
    X = np.random.rand(5000,5)
    y = (X[:,0]<0.5)*(X[:,1]<0.9)*(X[:,2]<0.85)
    X_valid = np.random.rand(1000,5)
    y_valid = (X_valid[:,0]<0.5)*(X_valid[:,1]<0.9)*(X_valid[:,2]<0.85)
    model = build_model(5, 1)
    model.fit(X, y, nb_epoch=10, batch_size=32, validation_data=(X_valid, y_valid), verbose=True)

In [None]:
nb_folds = 4
kfolds = KFold(len(y), nb_folds)
av_roc = 0.
f = 0
for train, valid in kfolds:
    print('---'*20)
    print('Fold', f)
    print('---'*20)
    f += 1
    X_train = X[train]
    X_valid = X[valid]
    #Y_train = Y[train]
    y_train = y[train]
    #Y_valid = Y[valid]
    y_valid = y[valid]

    print("Building model...")
    model = build_model(input_dim, output_dim)

    print("Training model...")

    model.fit(X_train, y_train, nb_epoch=5, batch_size=32, validation_data=(X_valid, y_valid), verbose=True)
    valid_preds = model.predict_proba(X_valid, verbose=0)
    roc = metrics.roc_auc_score(y_valid, valid_preds)
    print("ROC:", roc)
    av_roc += roc

print('Average ROC:', av_roc/nb_folds)

------------------------------------------------------------
('Fold', 0)
------------------------------------------------------------
Building model...
Training model...
Train on 7879 samples, validate on 2627 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
('ROC:', 0.5)
------------------------------------------------------------
('Fold', 1)
------------------------------------------------------------
Building model...
Training model...
Train on 7879 samples, validate on 2627 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
('ROC:', 0.5)
------------------------------------------------------------
('Fold', 2)
------------------------------------------------------------
Building model...
Training model...
Train on 7880 samples, validate on 2626 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
('ROC:', 0.5)
------------------------------------------------------------
('Fold', 3)
------------------------------------------------------------
Building model...
Tr

array([[ 1.],
       [ 1.],
       [ 1.],
       ..., 
       [ 1.],
       [ 1.],
       [ 1.]], dtype=float32)