In [1]:
import pandas as pd
import numpy as np
import keras

Using TensorFlow backend.


In [2]:
dataset = pd.read_csv('data/Seismic-Bumps.csv', header = None)
dataset.shape

(2584, 19)

In [3]:
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,a,a,N,15180,48,-72,-72,a,0,0,0,0,0,0,0,0,0,0,0
1,a,a,N,14720,33,-70,-79,a,1,0,1,0,0,0,0,0,2000,2000,0
2,a,a,N,8050,30,-81,-78,a,0,0,0,0,0,0,0,0,0,0,0
3,a,a,N,28820,171,-23,40,a,1,0,1,0,0,0,0,0,3000,3000,0
4,a,a,N,12640,57,-63,-52,a,0,0,0,0,0,0,0,0,0,0,0


## Create X and Y

In [4]:
X = dataset.iloc[:, 0:18].values
Y = dataset.iloc[:, 18].values


In [5]:
X.shape

(2584, 18)

In [6]:
Y.shape

(2584,)

In [7]:
X[0]

array(['a', 'a', 'N', 15180, 48, -72, -72, 'a', 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0], dtype=object)

In [8]:
Y[0]

0

## Preprocess the Data

In [9]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler

In [10]:
le_Y = LabelEncoder()

In [11]:
Y = le_Y.fit_transform(Y)
Y[0]

0

In [12]:
def encoder_X(index):
    le_X = LabelEncoder()
    X[:, index] = le_X.fit_transform(X[:, index])


In [13]:
to_be_encoded_indices = [0, 1, 2, 7]

In [14]:
for x in to_be_encoded_indices:
    encoder_X(x)

In [15]:
X[0]

array([0, 0, 0, 15180, 48, -72, -72, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=object)

In [16]:
ohe_X = OneHotEncoder(categorical_features = [7])

In [17]:
X = ohe_X.fit_transform(X).toarray()
X = X[:, 1:]
X.shape

(2584, 19)

In [18]:
ohe_X = OneHotEncoder(categorical_features = [3])

In [19]:
X = ohe_X.fit_transform(X).toarray()
X = X[:, 1:]
X.shape

(2584, 20)

In [20]:
sc_X = StandardScaler()


In [21]:
X = sc_X.fit_transform(X)
X[0]


array([-0.76630515, -0.13757705, -0.29895828, -0.10838026, -0.73230209,
       -1.34374329, -0.32756058, -0.87207336, -1.05071094, -1.21145926,
       -0.62998398, -0.50225321, -0.51042346, -0.24273572, -0.06830542,
        0.        ,  0.        ,  0.        , -0.24332671, -0.22108685])

## Create Train and Test Data


In [22]:
from sklearn.model_selection import train_test_split

In [23]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 4)

In [24]:
X_train.shape

(2067, 20)

In [25]:
X_test.shape

(517, 20)

In [26]:
Y_train.shape

(2067,)

In [27]:
Y_test.shape

(517,)

In [28]:
pd.DataFrame(pd.DataFrame(Y_train)[0].value_counts())

Unnamed: 0,0
0,1935
1,132


In [29]:
pd.DataFrame(pd.DataFrame(Y_test)[0].value_counts())

Unnamed: 0,0
0,479
1,38


## Create and train the Neural Network Classifier

In [30]:

from keras.models import Sequential
from keras.layers import Dense

In [31]:
clf_ann = Sequential()

In [32]:
# 10 20 40 80 160
# 40 40 160

# First Hidden Layer
clf_ann.add(Dense(output_dim = 40, init = 'uniform', activation = 'relu', input_dim = 20))

clf_ann.add(Dense(output_dim = 40, init = 'uniform', activation = 'relu'))

clf_ann.add(Dense(output_dim = 160, init = 'uniform', activation = 'relu'))

# Output Layer
clf_ann.add(Dense(output_dim = 1, init = 'uniform', activation = 'sigmoid'))

# Compile the ANN
clf_ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train the ANN on the Train Data
clf_ann.fit(X_train, Y_train, batch_size = 10, nb_epoch = 100)

  """
  import sys
  if __name__ == '__main__':
  if sys.path[0] == '':


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x1ed1b50cc18>

## Test the Neural Netwrok on the Test Data


In [33]:
Y_pred = clf_ann.predict(X_test)
Y_pred = (Y_pred > 0.5)

## Check the Accuracy

In [34]:
from sklearn.metrics import accuracy_score, confusion_matrix
accuracy_score(Y_test, Y_pred)

0.9129593810444874

In [35]:
confusion_matrix(Y_test, Y_pred)

array([[460,  19],
       [ 26,  12]], dtype=int64)