In [None]:
# Imports
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from keras import regularizers
from keras.layers import Dropout
from tensorflow.keras.optimizers import SGD
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

Preprocessing and scaling data

In [None]:
#Function to scale data to avoid large values in e^(-z) of sigmoid function, returns scaled data
def scaling(dataframe):
  scaleddata = (dataframe - dataframe.mean()) / (dataframe.max() - dataframe.min())
  return scaleddata
 
#Reading data and shuffling
df = pd.read_csv(r"diabetes.csv")

#Extracting original outputs in ycomplete
ycomplete = df["Outcome"].values.reshape(df["Outcome"].shape[0],1)

#Scaling and converting to numpy array
data_scaled = scaling(df.iloc[:,:-1])

Part 1 - Logistic regression

In [None]:
data_scaled_cpy = data_scaled.copy()
data_scaled_cpy.insert(data_scaled_cpy.shape[1],'Bias',1)

#Function to find z and sigmoid function using provided data
def hypothesis(w, traindata):
    z = np.dot(traindata,w)
    return 1/(1+np.exp(-z))

#Function to check output on validation or test dataset
def validatingtesting(w,data,y_in):
    class_rep = np.where( hypothesis(w, data) > 0.5 , 1, 0)
    accuracy = (1- (np.average((class_rep - y_in)**2))) * 100
    return accuracy

#Method to calculate cost function
def costcal(y,h):
    cost = np.sum(- (y * np.log(h)) - ((1-y) * np.log(1 - h)))/len(h)
    return cost

def main():

    #Splitting data into 60% training, 20% validation and 20% testing sets

    traindata, validatedata, y_train, y_validate= train_test_split(data_scaled_cpy.values,ycomplete,test_size=0.4,random_state=2)
    validatedata, testdata, y_validate, y_test= train_test_split(validatedata, y_validate,test_size=0.5,random_state=2)
   
    #Initializing weights
    w = np.zeros(9).reshape(9,1)

    #Hyperparameters
    alpha = 0.03
    epochs = 200

    h = hypothesis(w, traindata)
    valacc=[]
    trainacc=[]
    testacc=[]
    cost_train=[]
    cost_val=[]
    cost_test=[]

    for i in range(epochs):
        w = w - alpha * (((h-y_train).T).dot(traindata)).T
        h = hypothesis(w, traindata)

        cost_train.append(costcal(y_train,h))
        cost_val.append(costcal(y_validate,hypothesis(w, validatedata)))
        cost_test.append(costcal(y_test,hypothesis(w, testdata)))

        trainacc.append(validatingtesting(w,traindata,y_train))
        valacc.append(validatingtesting(w,validatedata,y_validate))
        testacc.append(validatingtesting(w,testdata,y_test))

    print("Weights after training: ", w.T)

    
    plt.figure("Accuracy vs Epochs")
    plt.figure(figsize=(10, 5))
    plt.plot(valacc, 'r', label = 'Validation')
    plt.plot(trainacc, 'g', label = 'Training')
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")
    plt.legend(loc = 'upper left')
    plt.show()

    
    plt.figure("Cost function vs Epochs")
    plt.figure(figsize=(10, 5))
    plt.plot(cost_train, 'r', label = 'Training')
    plt.plot(cost_val, 'g', label = 'Validation')
    plt.xlabel("Epochs")
    plt.ylabel("Cost function")
    plt.legend(loc = 'upper left')
    plt.show()

    print("Validation dataset accuracy:",valacc[len(valacc) - 1])

    print("Training dataset accuracy:",trainacc[len(trainacc) -1])

    print("Testing dataset accuracy:", testacc[len(testacc) -1])

if __name__ == "__main__":
    main()

##Part 2- Neural Network and
##Part 3- Regularizations and comparison

In [None]:

#Defining training, validation and testing datasets
x_train, x_exp, y_train, y_exp= train_test_split(data_scaled.values,ycomplete,test_size=0.4,random_state=2)
x_val, x_test, y_val, y_test= train_test_split(x_exp, y_exp,test_size=0.5,random_state=2)

# Creating the model

model = Sequential()
#Please change flag manually to view results
flag = 4 

if flag == 1:
  model.add(Dense(25, activation='relu', input_dim= x_train.shape[1]))  #Input layer
  model.add(Dense(15, activation='relu')) # Hidden layer.
  model.add(Dense(10, activation='relu')) # Hidden layer.
  model.add(Dense(1, activation='sigmoid'))  # Output layer.(Since there is 1 class)

if flag == 2:
  model.add(Dense(25, activation='relu', kernel_regularizer=tf.keras.regularizers.l1(0.01), input_dim= x_train.shape[1]))  #Input layer
  model.add(Dense(15, activation='relu',kernel_regularizer=tf.keras.regularizers.l1(0.01))) # Hidden layer.
  model.add(Dense(10, activation='relu',kernel_regularizer=tf.keras.regularizers.l1(0.01))) # Hidden layer.
  model.add(Dense(1, activation='sigmoid'))  # Output layer.(Since there is 1 class)

if flag == 3:
  model.add(Dense(25, activation='relu', kernel_regularizer= regularizers.l2(0.01), input_dim= x_train.shape[1]))  #Input layer
  model.add(Dense(15, activation='relu',kernel_regularizer= regularizers.l2(0.01))) # Hidden layer.
  model.add(Dense(10, activation='relu',kernel_regularizer= regularizers.l2(0.01))) # Hidden layer.
  model.add(Dense(1, activation='sigmoid'))  # Output layer.(Since there is 1 class)

if flag == 4:
  model.add(Dense(25, activation='relu', input_dim= x_train.shape[1])) #Input layer
  model.add(Dropout(0.2))
  model.add(Dense(15, activation='relu')) # Hidden layer.
  model.add(Dropout(0.1))
  model.add(Dense(10, activation='relu')) # Hidden layer.
  model.add(Dropout(0.2))
  model.add(Dense(1, activation='sigmoid'))  # Output layer.(Since there is 1 class)


epoch = 250

model.compile(optimizer=SGD(learning_rate=0.08,decay=0.008), loss='binary_crossentropy', metrics=['accuracy'])

# Training the model.
history = model.fit(x_train,y_train,validation_data=(x_val, y_val), epochs = epoch, batch_size = 16, verbose=0)

# Evaluating the model

loss_train, accuracy_train = model.evaluate(x_train, y_train, verbose=0)
print('\n Train data Loss:', loss_train, '\tTrain data Accuracy:', accuracy_train)
loss_valid, accuracy_valid = model.evaluate(x_val, y_val,verbose=0)
print('\n Validation data Loss:', loss_valid, '\tValidation data Accuracy:', accuracy_valid)
loss_test, accuracy_test = model.evaluate(x_test, y_test, verbose=0)
print('\n Test data Loss:', loss_test, '\tTest data Accuracy:', accuracy_test)

# Plot the accuracy and loss.

plt.figure("Accuracy vs Epochs")
plt.plot(history.history['accuracy'], 'r', label = 'Training')
plt.plot(history.history['val_accuracy'], 'g', label = 'Validation')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend(loc = 'upper left')
plt.show()

plt.figure("Loss vs Epochs")
plt.plot(history.history['loss'], 'r', label = 'Training')
plt.plot(history.history['val_loss'], 'g', label = 'Validation')
plt.xlabel("Epochs")
plt.ylabel("Cost function")
plt.legend(loc = 'upper left')
plt.show()
