In [32]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from livelossplot import PlotLossesKeras
# import keras models
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from keras.utils import np_utils

In [33]:
# set random seed globally
my_seed = 21
from numpy.random import seed
seed(my_seed)
import tensorflow as tf
tf.random.set_seed(my_seed) 
# tensorflow version
print(tf.__version__)

2.6.0


In [34]:
df = pd.read_csv('1_Datasets/Maternal_Health_Risk_Data_Set.csv',
                 )
df

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,high risk
1,35,140,90,13.0,98.0,70,high risk
2,29,90,70,8.0,100.0,80,high risk
3,30,140,85,7.0,98.0,70,high risk
4,35,120,60,6.1,98.0,76,low risk
...,...,...,...,...,...,...,...
1009,22,120,60,15.0,98.0,80,high risk
1010,55,120,90,18.0,98.0,60,high risk
1011,35,85,60,19.0,98.0,86,high risk
1012,43,120,90,18.0,98.0,70,high risk


In [35]:
# encoding the target column
le = LabelEncoder()
label = le.fit_transform(df['RiskLevel'])
label

encoded_df = df.copy()
encoded_df.drop("RiskLevel", axis=1, inplace=True)
encoded_df["RiskLevel"] = label
encoded_df

Unnamed: 0,Age,SystolicBP,DiastolicBP,BS,BodyTemp,HeartRate,RiskLevel
0,25,130,80,15.0,98.0,86,0
1,35,140,90,13.0,98.0,70,0
2,29,90,70,8.0,100.0,80,0
3,30,140,85,7.0,98.0,70,0
4,35,120,60,6.1,98.0,76,1
...,...,...,...,...,...,...,...
1009,22,120,60,15.0,98.0,80,0
1010,55,120,90,18.0,98.0,60,0
1011,35,85,60,19.0,98.0,86,0
1012,43,120,90,18.0,98.0,70,0


In [36]:
# Set the total number of classes
nb_classes = len(encoded_df['RiskLevel'].unique())

# Creating target and features
X = encoded_df.drop(['RiskLevel'], axis=1)
y = encoded_df['RiskLevel']

# scale the variables
sc = StandardScaler() 
X_scaled = sc.fit_transform(X)

# Split into train and test set and normalize data
X_train, X_test, y_train, y_test = train_test_split(X_scaled,y, test_size = 0.2,stratify=y) #, random_state = 0)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(811, 6)
(811,)
(203, 6)
(203,)


In [37]:
def buildSequentialModel():
    # Build a Sequential Model.
    model = Sequential()
    # model.add(Flatten(input_shape=(28, 28)))
    
    
    model.add(Dense(100, kernel_initializer='normal', activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(80, kernel_initializer='normal', activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(60, kernel_initializer='normal', activation='tanh'))
    model.add(Dropout(0.3))
    
    # Output Layer
    model.add(Dense(nb_classes, activation='softmax'))
   
    return model

Conventional Activation Functions to test
- Logistic regression hypothesis (Sigmoid)
- Hyperbolic Tangent (tanh) # rescaled sigmoid to (-1, +1)
- Rectified Linear Unit (ReLU)
- Gaussian Error Linear Unit (GELU) # smoothed ReLU
- Normalized Exponential Function (Softmax)

In [38]:
# instantiate model
model = buildSequentialModel()
# compile model
#model.compile(loss='categorical_crossentropy', optimizer='adam')
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
                loss=keras.losses.SparseCategoricalCrossentropy(),
                metrics=['accuracy'])

In [39]:
model.fit(X_train, y_train,
          batch_size=100, epochs=50, verbose=1,
          validation_data=(X_test, y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x196b4ec0c10>

In [40]:
#scores = model.evaluate(X_test, Y_test, verbose=0)
score = model.evaluate(X_test, y_test, verbose=1)
print('Your CV accuracy score is:', score[1])

Your CV accuracy score is: 0.6305418610572815


In [41]:
# The predict_classes function outputs the highest probability class
# according to the trained classifier for each input example.
predicted_classes = np.argmax(model.predict(X_test), axis=-1)


# Identify which examples were correctly and incorrectly classified.
#correct_indices = np.nonzero(predicted_classes == y_test)[0]
#incorrect_indices = np.nonzero(predicted_classes != y_test)[0]

correct_indices = np.nonzero(predicted_classes == y_test.values)[0]
incorrect_indices = np.nonzero(predicted_classes != y_test.values)[0]


# Count the number of incorrectly classified examples of the considered test cases
len(incorrect_indices)
print("Your classifier got the following " + str(len(incorrect_indices)) + " items wrong out of " + str(X_test.shape[0]) + ":")
print(incorrect_indices)

Your classifier got the following 75 items wrong out of 203:
[  0   7   8   9  12  14  16  18  19  21  25  28  29  30  33  37  38  43
  44  45  48  51  53  57  60  61  62  63  66  67  71  73  75  76  77  78
  81  83  85  87  92  94  97 102 104 106 108 109 115 121 122 124 126 130
 131 135 137 138 142 144 146 150 152 164 167 168 170 175 180 186 188 191
 193 197 199]


In [42]:
import sklearn
sklearn.metrics.confusion_matrix(y_test, predicted_classes, labels=None, sample_weight=None, normalize=None)

array([[39,  6, 10],
       [ 4, 70,  7],
       [11, 37, 19]], dtype=int64)