In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use("seaborn")
import math
import random
import time
from numpy import log, e

In [2]:
def sigmoid(z): 
    return 1 / (1 + e**(-z))
#     return 1 / (1 + e**(-z)) - 0.0000001

In [3]:
def cost(wt, X, Y):
    Y_pred = sigmoid(X@wt)
    predict_0 = 0
    predict_1 = 0
    for i in range(len(Y)):
        if(Y[i]==0): predict_0 += (1 - Y[i]) * log(1 - Y_pred[i])
        else: predict_1 += Y[i] * log(Y_pred[i])
    return -(predict_1 + predict_0)/len(X)
#     return -(predict_1 + predict_0)

In [4]:
def accuracy(wt,X,Y):
    Y_pred = sigmoid(X@wt)
#     for i in range(len(Y_pred)):
#         if Y_pred[i]>=0.5: Y_pred[i]=1
#         else: Y_pred[i]=0
    Y_pred[Y_pred>=0.5]=1
    Y_pred[Y_pred<0.5]=0
    Y_pred = np.array(Y_pred)
    correct_class = np.sum(Y_pred==Y)   
    return 100*(correct_class/len(Y_pred))

In [5]:
def fpr(Y_pred,Y):
    tp, tn, fp, fn = 0, 0, 0, 0
    
    for i in range(len(Y)):
        if Y_pred[i] and Y[i]:
            tp += 1
        elif Y_pred[i] and not Y[i]:
            fn += 1
        elif not Y_pred[i] and Y[i]:
            fp += 1
        else:
            tn += 1
#     acc = (tp + tn) / (tp + tn + fp + fn)
    recall = tp / (tp + fn)
    precision = tp / (tp + fp)
    Fscore = 2 * recall * precision / (recall + precision)
    return Fscore, recall, precision
        

In [6]:
def plot_graph_loss(x_plot,y_plot):
    fig = plt.figure(figsize=(8,5))
    plt.subplot(1,2,1)
    plt.plot(x_plot,y_plot)
    plt.xlabel("Epochs")
    plt.ylabel("Loss/Error")
    plt.title("Error vs Epoch for Gradient Descent")
    plt.tight_layout()
    plt.show()

In [7]:
def gd(X,Y):
    wt = np.zeros(X.shape[1])
    eta = 0.01
#     eta = 0.0000005
#     eta=50
    iterations = 15000
    N = len(X)
    loss = []
    epochs = []
    acc = []
    for itr in range(iterations+1):        
        Y_pred = sigmoid(X@wt)
        wt -= eta * ((Y_pred-Y)@X) / N
        if(itr%50 ==0):
            loss.append(cost(wt,X,Y))
            acc.append(accuracy(wt,X,Y))
#             print(loss[-1])
            epochs.append(itr)
    loss = np.array(loss)
    epochs = np.array(epochs)
    return wt,loss,epochs,acc

In [8]:
def sgd(X,Y):
    wt = np.zeros(X.shape[1])
    eta = 0.05
#     eta = 50
#     eta = 0.0000005
    iterations = 1000
    loss = []
    epochs = []
    acc = []
    N = len(X)
    for itr in range(iterations):
        Y_pred = np.array(X@wt)
        if(itr%50 == 0):
            loss.append(cost(wt,X,Y))
            acc.append(accuracy(wt,X,Y))
            epochs.append(itr)
        i=0
        for x,y in zip(X,Y):
            i=i+1
#             if((i*itr)%50 == 0):
#                 loss.append(cost(wt,X,Y))
#                 acc.append(accuracy(wt,X,Y))
#                 epochs.append(itr)
            t = x@wt
            y_pred = sigmoid(t)
            delta = y-y_pred
            wt+= eta*(delta*x) / N
#     loss.append(cost(wt,X,Y))
    loss = np.array(loss)
    epochs = np.array(epochs)
    return wt,loss,epochs,acc

In [9]:
def split(data):
    n=round(len(data)*0.7)
    np.random.shuffle(data)
    train = data[:n]
    test = data[n:]
    x_train, x_test, y_train, y_test = [], [], [], []
    for i in train:
        x_train.append(i[: (train.shape[1]-1)])
        y_train.append(i[(train.shape[1]-1)])
    
    for i in test:
        x_test.append(i[: (test.shape[1]-1)])
        y_test.append(i[(test.shape[1]-1)])
        
    x_train = np.array(x_train)
    m=np.mean(x_train,axis=0)
    s=np.std(x_train,axis=0)
    x_train=(x_train-m)/s
    ones=np.ones((x_train.shape[0],1))
    x_train = np.append(ones, x_train, axis=1)
    y_train = np.array(y_train)
    x_test = np.array(x_test)
    m=np.mean(x_test,axis=0)
    s=np.std(x_test,axis=0)
    x_test=(x_test-m)/s
    ones=np.ones((x_test.shape[0],1))
    x_test = np.append(ones, x_test, axis=1)
    y_test = np.array(y_test)
    return x_train, y_train, x_test, y_test

In [10]:
def predict(wt,X):
    Y_pred = sigmoid(X@wt)
    Y_pred[Y_pred>=0.5]=1
    Y_pred[Y_pred<0.5]=0
    Y_pred = np.array(Y_pred)
    return Y_pred

In [12]:
def main():
    start=time.time()
    data = pd.read_csv('dataset_LR.csv')
    data = np.array(data)
    atrgd,ftrgd,rtrgd,ptrgd,ategd,ftegd,rtegd,ptegd = 0,0,0,0,0,0,0,0
    atrsgd,ftrsgd,rtrsgd,ptrsgd,atesgd,ftesgd,rtesgd,ptesgd = 0,0,0,0,0,0,0,0
    iterations = 10
    for i in range(iterations):
        x_train, y_train, x_test, y_test = split(data)
    
        wt_gd, loss_gd, itr_gd, acc_gd_list = gd(x_train, y_train)
        Y_pred_train_gd = predict(wt_gd, x_train)
        Y_pred_test_gd = predict(wt_gd, x_test)
        
        wt_sgd, loss_sgd, itr_sgd, acc_sgd_list = sgd(x_train, y_train)
        Y_pred_train_sgd = predict(wt_sgd, x_train)
        Y_pred_test_sgd = predict(wt_sgd, x_test)
        
        acc_gd_train = accuracy(wt_gd,x_train,y_train)
        fscore_gd_train, recall_gd_train, precision_gd_train = fpr(Y_pred_train_gd,y_train)        
        acc_gd_test = accuracy(wt_gd,x_test,y_test)
        fscore_gd_test, recall_gd_test, precision_gd_test = fpr(Y_pred_test_gd,y_test)
        
        acc_sgd_train = accuracy(wt_sgd,x_train,y_train)
        fscore_sgd_train, recall_sgd_train, precision_sgd_train = fpr(Y_pred_train_sgd,y_train)        
        acc_sgd_test = accuracy(wt_sgd,x_test,y_test)
        fscore_sgd_test, recall_sgd_test, precision_sgd_test = fpr(Y_pred_test_sgd,y_test)
        
        atrgd += acc_gd_train
        ftrgd += fscore_gd_train
        rtrgd += recall_gd_train
        ptrgd += precision_gd_train
        
        ategd += acc_gd_test
        ftegd += fscore_gd_test
        rtegd += recall_gd_test
        ptegd += precision_gd_test
        
        atrsgd += acc_sgd_train
        ftrsgd += fscore_sgd_train
        rtrsgd += recall_sgd_train
        ptrsgd += precision_sgd_train
        
        atesgd += acc_sgd_test
        ftesgd += fscore_sgd_test
        rtesgd += recall_sgd_test
        ptesgd += precision_sgd_test
        
#         plot_graph_loss(itr_sgd,acc_sgd_list)
        
        print("Wt of gd:",wt_gd)
        print("Wt of sgd:",wt_sgd)
        
        print("##############################################################")
        
        print("Train Statistics using GD for ",i+1," iteration")
        
        print("Accuracy: ", "{0:.2f}".format(acc_gd_train))
        print("Fscore: ","{0:.2f}".format(fscore_gd_train))
        print("Recall: ","{0:.2f}".format(recall_gd_train))
        print("Precision: ","{0:.2f}".format(precision_gd_train))
        
        print("##############################################################")
        
        print("Test Statistics using GD for ",i+1," iteration")
#         print("Wt of gd:",wt_gd)
        print("Accuracy: ","{0:.2f}".format(acc_gd_test))
        print("Fscore: ","{0:.2f}".format(fscore_gd_test))
        print("Recall: ","{0:.2f}".format(recall_gd_test))
        print("Precision: ","{0:.2f}".format(precision_gd_test))
        
        print("##############################################################")
        
        print("Train Statistics using SGD for ",i+1," iteration")
        
        print("Accuracy: ","{0:.2f}".format(acc_sgd_train))
        print("Fscore: ","{0:.2f}".format(fscore_sgd_train))
        print("Recall: ","{0:.2f}".format(recall_sgd_train))
        print("Precision: ","{0:.2f}".format(precision_sgd_train))
        
        print("##############################################################")
        
        print("Test Statistics using SGD for ",i+1," iteration")
#         print("Wt of sgd:",wt_sgd)
        print("Accuracy: ","{0:.2f}".format(acc_sgd_test))
        print("Fscore: ","{0:.2f}".format(fscore_sgd_test))
        print("Recall: ","{0:.2f}".format(recall_sgd_test))
        print("Precision: ","{0:.2f}".format(precision_sgd_test))
        
        print("##############################################################")
        

    print("Average Train Statistics using GD")
    print("Average Accuracy: ", "{0:.2f}".format(atrgd/iterations))
    print("Average Fscore: ", "{0:.2f}".format(ftrgd/iterations))
    print("Average Recall: ", "{0:.2f}".format(rtrgd/iterations))
    print("Average Precision: ", "{0:.2f}".format(ptrgd/iterations))
    
    print("##############################################################")
    print("Average Test Statistics using GD")
    print("Average Accuracy: ","{0:.2f}".format(ategd/iterations))
    print("Average Fscore: ", "{0:.2f}".format(ftegd/iterations))
    print("Average Recall: ", "{0:.2f}".format(rtegd/iterations))
    print("Average Precision: ", "{0:.2f}".format(ptegd/iterations))
    
    print("##############################################################")
    print("Average Train Statistics using SGD")
    print("Average Accuracy: ", "{0:.2f}".format(atrsgd/iterations))
    print("Average Fscore: ", "{0:.2f}".format(ftrsgd/iterations))
    print("Average Recall: ", "{0:.2f}".format(rtrsgd/iterations))
    print("Average Precision: ", "{0:.2f}".format(ptrsgd/iterations))
    
    print("##############################################################")
    print("Average Train Statistics using SGD")
    print("Average Accuracy: ", "{0:.2f}".format(atesgd/iterations))
    print("Average Fscore: ", "{0:.2f}".format(ftesgd/iterations))
    print("Average Recall: ", "{0:.2f}".format(rtesgd/iterations))
    print("Average Precision: ", "{0:.2f}".format(ptesgd/iterations))

    end=time.time()
    print("Time taken is: ",(end-start))

In [13]:
main()
print('END')

Wt of gd: [-0.87073857 -3.9224043  -3.45094838 -3.2508374   0.31221243]
Wt of sgd: [-0.49840162 -2.85227815 -2.1838462  -1.96072342  0.18915943]
##############################################################
Train Statistics using GD for  1  iteration
Accuracy:  97.81
Fscore:  0.98
Recall:  0.96
Precision:  1.00
##############################################################
Test Statistics using GD for  1  iteration
Accuracy:  98.30
Fscore:  0.98
Recall:  0.97
Precision:  0.99
##############################################################
Train Statistics using SGD for  1  iteration
Accuracy:  97.29
Fscore:  0.97
Recall:  0.95
Precision:  0.99
##############################################################
Test Statistics using SGD for  1  iteration
Accuracy:  98.06
Fscore:  0.98
Recall:  0.96
Precision:  0.99
##############################################################
Wt of gd: [-0.93897588 -3.9067179  -3.42133874 -3.22232381  0.29637233]
Wt of sgd: [-0.55008191 -2.8732603  -2.15451