# Steps involved in creating a neural network #

1) Define the model
2) Compile the model
3) Fit the model


In [1]:
# Import iris dataset #
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
iris = load_iris()
iris_df = pd.DataFrame(data=np.c_[iris['data'],iris['target']],
                    columns=iris['feature_names'] + ['target'])
iris_df = iris_df.loc[iris_df['target'] != 2]
feature_vector = iris_df.columns.str.contains('cm')
X = iris_df.loc[:,feature_vector]
Y = iris_df[['target']]

# Split the dataset into test and train datasets

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.60, random_state=0)

# """
# Objective - Convert the dataset to binary dataset
# 1. Convert it to a dataframe. Why? What is a dataframe? DONE
# 2. Filter target with !=2 and features with corresponding value. DONE
# 3. What is the current data type of iris variable? DONE
#     + Bunch data type by sklearn. A dictionary with two numpy arrays -- data and target.
#     Contains additional details about the dataset as well.
# 4. Qs
#     + Cross check the parameters in train_test_split()
# """


# Logistic regression using DNN #
    1. The model
    2. Confusion matrix
    3. Classification report

__The Model__

In [2]:
### Logistic regression using DNN ###
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils import np_utils

np.random.seed(7)
nb_classes = 4

# Defining model #

model = Sequential()
model.add(Dense(1,input_dim=4,activation='sigmoid'))
# model.add(Dense(1))
# model.add(Activation('softmax'))
# model.add(Dense(16,activation='sigmoid'))
# model.add(Dense(1,activation='sigmoid'))

# Compile the model #

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the model #

# model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=200, batch_size=10)
model.fit(x_train, y_train, epochs=200, batch_size=10, verbose=0)
# model.fit(x_train, y_train, epochs=50, batch_size=1)
# How is the dataset distributed?
# model.fit(x_train, y_train, epochs=150, batch_size=10)
# model.summary()

# Evaluate the model #

scores = model.evaluate(x_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

# """
# Classification report and optimization
#     1. Adding an exhaustive classification report
#     2. Analyse the report
#     3. Improve the model
#     4. Cross check the existing code
# """


Using TensorFlow backend.



acc: 93.33%


**K fold cross validation**

In [9]:
# MLP for Pima Indians Dataset with 10-fold cross validation
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import StratifiedKFold
import numpy
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# # load pima indians dataset
# dataset = numpy.loadtxt("pima-indians-diabetes.csv", delimiter=",")
# # split into input (X) and output (Y) variables
# X = dataset[:,0:8]
# Y = dataset[:,8]
# define 10-fold cross validation test harness
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
cvscores = []
# X = X.values
# Y = Y.values
# print(type(X))
for train, test in kfold.split(X, Y):
    # create model
    model = Sequential()
    model.add(Dense(1,input_dim=4,activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # Fit the model
    # model.fit(X[train], Y[train], epochs=150, batch_size=10, verbose=0)
    model.fit(X.iloc[train], Y.iloc[train], epochs=100, batch_size=10, verbose=0)
    # evaluate the model
    scores = model.evaluate(X.iloc[test], Y.iloc[test], verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)
print("%.2f%% (+/- %.2f%%)" % (numpy.mean(cvscores), numpy.std(cvscores)))

acc: 100.00%
acc: 100.00%
acc: 100.00%
acc: 80.00%
acc: 100.00%
acc: 20.00%
acc: 100.00%
acc: 100.00%
acc: 100.00%
acc: 100.00%
90.00% (+/- 24.08%)


__Confusion Matrix__

In [4]:
# Prediction #
y_pred = model.predict(x_test)
y_pred = (y_pred>0.5)
# Plot ROC curve. Sensitivity vs specificity
# Try changing the threshold value
# prediction.shape
# print(prediction)

# Creating the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print(cm)
# Remove the 3rd class #
# Loss function and response variable encoding(y_observed) #

[[32  0]
 [ 0 28]]


__Classification Report__

In [5]:
target_names = [iris.target_names[0], iris.target_names[1]]
print(classification_report(y_test, y_pred, target_names=target_names))

             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00        32
 versicolor       1.00      1.00      1.00        28

avg / total       1.00      1.00      1.00        60



# SVM using DNN #

    1. The model
    2. Confusion matrix
    3. Classification report

In [6]:
### SVM using DNN ###

# Defining model #
np.random.seed(7)

model = Sequential()
model.add(Dense(1,input_dim=4,activation='linear'))
# model.add(Dense(1,activation='linear'))

# Compile the model #

model.compile(loss='categorical_hinge', optimizer='adam', metrics=['accuracy'])

# Fit the model #

model.fit(X, Y, epochs=60, batch_size=10)

# Evaluate the model #

scores = model.evaluate(X, Y)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60

acc: 81.00%


# Qs/Doubts #
1. Core methods to analyse
    + Sequential
    + add
    + compile
    + fit
    + evaluate
2. What exactly is sequential for?
3. Classification report
    + precision
    + recall
    + f1 score
    + support
