In [1]:
from numpy import loadtxt
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU,PReLU,ELU
from keras.layers import Dropout
import pandas as pd
import matplotlib.pyplot as plt
import csv
import numpy as np
import seaborn as sns
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
data_path = '/content/drive/MyDrive/SWE_428_data/nn_data.txt'
dataset = pd.read_csv(data_path, header=None)
dataset.shape

(768, 9)

In [3]:
unscaled_x = dataset.iloc[:,0:8]
unscaled_y = dataset.iloc[:,8]

In [4]:
# define the keras model
model = Sequential()
model.add(Dense(8, input_shape=(8,), activation='relu'))
model.add(Dense(6, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [5]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# fit the keras model on the dataset
model.fit(unscaled_x, unscaled_y, epochs=100, batch_size=10)

In [7]:
# evaluate the keras model
_, accuracy = model.evaluate(unscaled_x, unscaled_y)
print('Accuracy: %.2f' % (accuracy*100))

Accuracy: 69.01


In [8]:
from sklearn import preprocessing
min_max_scaling = preprocessing.MinMaxScaler().fit(dataset)
scaled_dataset = min_max_scaling.transform(dataset)


In [9]:
scaled_x = scaled_dataset[:,0:8]
scaled_y = scaled_dataset[:,8]

In [10]:
from sklearn.model_selection import StratifiedKFold as kfold
from sklearn.metrics import confusion_matrix , classification_report, precision_score, accuracy_score

In [11]:
kfold_splits = 10
folds = kfold(n_splits=kfold_splits)

In [12]:
precision_scores = []
accuracy_scores = []
     

## With K Fold validation

In [13]:
def ann_model_with_k_fold(train_input,train_output,test_input,test_output,epochs,batch_size):
  ann_model = Sequential()
  ann_model.add(Dense(8, input_shape=(8,), activation='relu'))
  ann_model.add(Dense(6, activation='relu'))
  ann_model.add(Dense(1, activation='sigmoid'))
  ann_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
  ann_model.fit(train_input, train_output, epochs=epochs, batch_size=batch_size)
  y_pred = (ann_model.predict(test_input) > 0.5).astype(int)
  precision_scores.append(precision_score(test_output, y_pred))
  accuracy_scores.append(accuracy_score(test_output, y_pred))

## Without K-Fold Validation

In [14]:
def ann_model_without_k_fold(X,y,epochs,batch_size) :
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)
    ann_model = Sequential();
    ann_model.add(Dense(8, input_shape=(8,), activation='relu'))
    ann_model.add(Dense(6, activation='relu'))
    ann_model.add(Dense(1, activation='sigmoid'))
    ann_model.compile( optimizer = 'adam' , loss = 'binary_crossentropy', metrics = ['accuracy'])
    ann_model.fit(X_train,y_train,epochs=epochs,batch_size = batch_size)
    y_pre = ann_model.predict(X_test);
    y_pred = [] ;
    for element in y_pre:
        if element > 0.5 :
            y_pred.append(1)
        else :
            y_pred.append(0)
    precisionScore = precision_score(y_test, y_pred)
    accuracyScore = accuracy_score(y_test, y_pred)
    return precisionScore,accuracyScore ;

In [None]:
u_precisionScore,u_accuracyScore = ann_model_without_k_fold(unscaled_x,unscaled_y,100,10)
s_precisionScore,s_accuracyScore = ann_model_without_k_fold(scaled_x,scaled_y,100,10)

In [16]:
print("Precision (Unscaled) :  %.2f%% " % (u_precisionScore))
print("Accuracy ((Unscaled) :  %.2f%% " % (u_accuracyScore))
print("Precision (scaled) :  %.2f%% " % (s_precisionScore))
print("Accuracy ((scaled) :  %.2f%% " % (s_accuracyScore))

Precision (Unscaled) :  0.70% 
Accuracy ((Unscaled) :  0.73% 
Precision (scaled) :  0.74% 
Accuracy ((scaled) :  0.75% 


In [None]:
for train_index, test_index in folds.split(scaled_x,scaled_y):
   X_train, X_test = scaled_x[train_index], scaled_x[test_index]
   y_train, y_test = scaled_y[train_index], scaled_y[test_index]
   ann_model_with_k_fold(X_train,y_train,X_test,y_test,100,10)

In [18]:
print("Avg Precision (K-Fold) :  %.2f%% (+/- %.2f%%)" % (np.mean(precision_scores), np.std(precision_scores)))
print("Avg Accuracy (K-Fold) : %.2f%% (+/- %.2f%%)" % (np.mean(accuracy_scores), np.std(accuracy_scores)))

Avg Precision (K-Fold) :  0.72% (+/- 0.08%)
Avg Accuracy (K-Fold) : 0.77% (+/- 0.04%)


# Summary

| Model | Precision | Accuracy |
|-------|-----------|----------|
| With K-Fold on Scaled Dataset | 0.72% (+/- 0.08%) | 0.77% (+/- 0.04%) |
| Without K-Fold on Unscaled Dataset | 0.70% | 0.73% |
| Without K-Fold on scaled Dataset | 0.74% | 0.75% |


