In [1]:
import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils

from keras import optimizers
from keras import initializers

Using TensorFlow backend.


# Getting data

In [3]:
df = pd.read_csv("HTRU_2.csv", header = None) #csv file doesn`t have headers

In [4]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,140.5625,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225,0
1,102.507812,58.88243,0.465318,-0.515088,1.677258,14.860146,10.576487,127.39358,0
2,103.015625,39.341649,0.323328,1.051164,3.121237,21.744669,7.735822,63.171909,0
3,136.75,57.178449,-0.068415,-0.636238,3.642977,20.95928,6.896499,53.593661,0
4,88.726562,40.672225,0.600866,1.123492,1.17893,11.46872,14.269573,252.567306,0


In [5]:
X = df.drop(8, axis=1)
y = df[8]

In [6]:
X

Unnamed: 0,0,1,2,3,4,5,6,7
0,140.562500,55.683782,-0.234571,-0.699648,3.199833,19.110426,7.975532,74.242225
1,102.507812,58.882430,0.465318,-0.515088,1.677258,14.860146,10.576487,127.393580
2,103.015625,39.341649,0.323328,1.051164,3.121237,21.744669,7.735822,63.171909
3,136.750000,57.178449,-0.068415,-0.636238,3.642977,20.959280,6.896499,53.593661
4,88.726562,40.672225,0.600866,1.123492,1.178930,11.468720,14.269573,252.567306
...,...,...,...,...,...,...,...,...
17893,136.429688,59.847421,-0.187846,-0.738123,1.296823,12.166062,15.450260,285.931022
17894,122.554688,49.485605,0.127978,0.323061,16.409699,44.626893,2.945244,8.297092
17895,119.335938,59.935939,0.159363,-0.743025,21.430602,58.872000,2.499517,4.595173
17896,114.507812,53.902400,0.201161,-0.024789,1.946488,13.381731,10.007967,134.238910


In [7]:
y

0        0
1        0
2        0
3        0
4        0
        ..
17893    0
17894    0
17895    0
17896    0
17897    0
Name: 8, Length: 17898, dtype: int64

# Build model

In [8]:
def create_model(activation, optimizer):
    kernel_init = initializers.TruncatedNormal(mean=0.0, stddev=0.5, seed=42)
    bias_init = initializers.Constant(value = 1e-3)
    model = Sequential()
    model.add(Dense(64, input_dim=8, activation=activation, kernel_initializer=kernel_init, bias_initializer=bias_init))
    model.add(Dense(1, activation=activation, kernel_initializer=kernel_init, bias_initializer=bias_init))
    model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [9]:
model = create_model('sigmoid', 'Adam')

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
print('Training set: X={}, y={}'.format(X_train.shape, y_train.shape))
print('Test set: X={}, y={}'.format(X_test.shape, y_test.shape))

Training set: X=(14318, 8), y=(14318,)
Test set: X=(3580, 8), y=(3580,)


# Training model

In [12]:
model.fit(X_train, y_train, epochs=20)

scores = model.evaluate(X, y)
print("\nAccuracy: %.2f%%" % (scores[1]*100))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Accuracy: 97.74%


In [13]:
cost, accuracy = model.evaluate(X_test, y_test)
print('Accuracy: %.2f%%' % (accuracy*100))

Accuracy: 97.68%


# GridSearchCV

In [14]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold

In [15]:
activations=['sigmoid', 'tanh', 'relu']
optimizers=['sgd', 'adam']
epochs=[10, 20, 30]

model_search = KerasClassifier(build_fn=create_model, verbose=0)

param_grid = dict(activation=activations, optimizer=optimizers, epochs=epochs)
kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

grid_search = GridSearchCV(model_search, param_grid=param_grid, cv=kfold, n_jobs=-1)

In [16]:
grid_result = grid_search.fit(X_train, y_train)

In [17]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.975555 using {'activation': 'sigmoid', 'epochs': 10, 'optimizer': 'adam'}
