In [4]:
import pandas as pd
import numpy as np
from math import pow

class Logistic_Regression(object):
    def __init__(self,learning_rate,no_columns):
        self.lr = learning_rate
        self.no_columns = no_columns
        
        self.b = [0.0 for i in range(no_columns+1)]
        
    def sigmoid(self,val):
        return 1/(1+np.exp(-val))
    
    def sigmoid_derivative(self,val):
        return val * (1.0 - val)
    
    def predict_class(self,row):
        o = self.b[-1]
        for i in range(len(row)):
            o += row[i]*self.b[i]
        
        return self.sigmoid(o)
    
    def log_reg(self,X,y):
        
        total_error = 0
        for i in range(len(X)):
            o = self.predict_class(X[i])
            error = y[i] - o
            total_error += pow(error,2)
            
            for j in range(self.no_columns):
                self.b[j] += error * self.lr * self.sigmoid_derivative(o) * X[i][j]
                
            self.b[-1] += error * self.lr * self.sigmoid_derivative(o)
            
        total_error = total_error * 0.5/len(X)
        print("Error = ","{:5.4f}".format(total_error))
        return total_error
            
    def cal_acc(self,o,y):
        count = 0
        
        for i in range(len(o)):
            if(o[i] == y[i]):
                count += 1
        
        return count/len(o)
    
    def output_to_list(self,o,threshold):
        
        for i in range(len(o)):
            if o[i] > threshold:
                o[i] = 1
                
            else:
                o[i] = 0
        return o
    
    def train(self,epochs,X,y,threshold):
        
        mse = []
        for epoch in range(epochs):
            print("Epoch ", epoch + 1," : ",end=" ")
            mse.append(self.log_reg(X,y))
        
        print("\n")
            
        o = []
        for i in range(len(X)):
            o.append(self.predict_class(X[i]))
            
        o = self.output_to_list(o,threshold)
        
        acc = self.cal_acc(o,y)
        
        return acc, mse
        
        
        
    def test(self,X,y,threshold):
        
        o = []
        for i in range(len(X)):
            o.append(self.predict_class(X[i]))
            
        o = self.output_to_list(o,threshold)
        
        acc = self.cal_acc(o,y)
        
        return acc
        

In [5]:
def split_into_kfolds(df,kfolds):
    df_list = []
    
    size = len(df)
    count = int(size/kfolds)

    for i in range(kfolds):
        beg_index = i * count
        end_index = (i+1) * count
        if i == kfolds-1:
            end_index = size

        df_list.append(df[beg_index:end_index])
        
    return df_list

def normalize(x):
    result = x.copy()
    for feature_name in x.columns:
        max_value = x[feature_name].max()
        min_value = x[feature_name].min()
        result[feature_name] = (x[feature_name] - min_value) / (max_value - min_value)
    return result



In [6]:
%matplotlib tk

from sklearn.model_selection import train_test_split
from datetime import datetime
import matplotlib.pyplot as plt



startTime = datetime.now()

df = pd.read_csv('ILPD.csv')
df = normalize(df)

col = col = ['Age','Gender','TB','DB','Alkphos','Sgpt','Sgot','TP','ALB','A/G']
col_class = 'Selector'

df = df.sample(frac = 1)


kfolds = 5
threshold = 0.5
learning_rate = 8 * pow(10,-4)
epochs = 200

score_train = []
score_test = []

count_epochs = range(1,epochs+1)
mse = []

df_list = split_into_kfolds(df,kfolds)

LR = Logistic_Regression(learning_rate,len(col))

mse = []

for i in range(kfolds):
    print("Training on cross-validation ",i+1,"\n" + "\n")
    
    df_train = pd.DataFrame()
    
    for j in range(0,i):
        df_train = pd.concat([df_train,df_list[j]], ignore_index=True)
      
    for j in range(i+1,kfolds):
        df_train = pd.concat([df_train,df_list[j]], ignore_index=True)
        
    df_test = df_list[i]
    
    X_train = df_train[col]
    y_train = df_train[col_class]
    
    X_test = df_test[col]
    y_test = df_test[col_class]
    
    X_train, y_train, X_test, y_test = np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
    
    x, mse = LR.train(epochs,X_train,y_train,threshold)
    
    score_train.append(x)
    
    score_test.append(LR.test(X_test,y_test,threshold))


    
plt.plot(count_epochs, mse, color='green', linestyle='solid', linewidth = 2, marker='o', markerfacecolor='blue', markersize=9) 
# setting x and y axis range 
plt.ylim(min(mse),max(mse)) 
plt.xlim(1,epochs) 
  
# naming the x axis 
plt.xlabel('Epoch number') 
# naming the y axis 
plt.ylabel('MSE') 
  
# giving a title to my graph 
plt.title('Logistic Regression') 
  
# function to show the plot 
plt.show() 

    
train ,test = train_test_split(df, test_size = 0.25)

X_test = test[col]
y_test = test[col_class]
X_test, y_test = np.array(X_test), np.array(y_test)

final_score = LR.test(X_test,y_test,threshold)
    


score_train_mean = sum(score_train)/len(score_train)
print("Training accuracy is : ", "{:5.4f}".format(score_train_mean))

score_test_mean = sum(score_test)/len(score_test)
print("Testing accuracy average of all folds is : ", "{:5.4f}".format(score_test_mean))

print("Final testing accuracy is : ","{:5.4f}".format(final_score))


print("\n")
print("Execution time in seconds = ", datetime.now() - startTime)


Training on cross-validation  1 


Epoch  1  :  Error =  0.1248
Epoch  2  :  Error =  0.1243
Epoch  3  :  Error =  0.1239
Epoch  4  :  Error =  0.1235
Epoch  5  :  Error =  0.1231
Epoch  6  :  Error =  0.1227
Epoch  7  :  Error =  0.1223
Epoch  8  :  Error =  0.1220
Epoch  9  :  Error =  0.1216
Epoch  10  :  Error =  0.1212
Epoch  11  :  Error =  0.1209
Epoch  12  :  Error =  0.1206
Epoch  13  :  Error =  0.1203
Epoch  14  :  Error =  0.1200
Epoch  15  :  Error =  0.1197
Epoch  16  :  Error =  0.1194
Epoch  17  :  Error =  0.1191
Epoch  18  :  Error =  0.1188
Epoch  19  :  Error =  0.1185
Epoch  20  :  Error =  0.1183
Epoch  21  :  Error =  0.1180
Epoch  22  :  Error =  0.1177
Epoch  23  :  Error =  0.1175
Epoch  24  :  Error =  0.1173
Epoch  25  :  Error =  0.1170
Epoch  26  :  Error =  0.1168
Epoch  27  :  Error =  0.1166
Epoch  28  :  Error =  0.1164
Epoch  29  :  Error =  0.1162
Epoch  30  :  Error =  0.1159
Epoch  31  :  Error =  0.1157
Epoch  32  :  Error =  0.1156
Epoch  33  :  

Epoch  69  :  Error =  0.1028
Epoch  70  :  Error =  0.1028
Epoch  71  :  Error =  0.1028
Epoch  72  :  Error =  0.1028
Epoch  73  :  Error =  0.1028
Epoch  74  :  Error =  0.1028
Epoch  75  :  Error =  0.1027
Epoch  76  :  Error =  0.1027
Epoch  77  :  Error =  0.1027
Epoch  78  :  Error =  0.1027
Epoch  79  :  Error =  0.1027
Epoch  80  :  Error =  0.1027
Epoch  81  :  Error =  0.1027
Epoch  82  :  Error =  0.1026
Epoch  83  :  Error =  0.1026
Epoch  84  :  Error =  0.1026
Epoch  85  :  Error =  0.1026
Epoch  86  :  Error =  0.1026
Epoch  87  :  Error =  0.1026
Epoch  88  :  Error =  0.1025
Epoch  89  :  Error =  0.1025
Epoch  90  :  Error =  0.1025
Epoch  91  :  Error =  0.1025
Epoch  92  :  Error =  0.1025
Epoch  93  :  Error =  0.1025
Epoch  94  :  Error =  0.1025
Epoch  95  :  Error =  0.1024
Epoch  96  :  Error =  0.1024
Epoch  97  :  Error =  0.1024
Epoch  98  :  Error =  0.1024
Epoch  99  :  Error =  0.1024
Epoch  100  :  Error =  0.1024
Epoch  101  :  Error =  0.1024
Epoch  1

Epoch  138  :  Error =  0.0999
Epoch  139  :  Error =  0.0999
Epoch  140  :  Error =  0.0999
Epoch  141  :  Error =  0.0999
Epoch  142  :  Error =  0.0999
Epoch  143  :  Error =  0.0999
Epoch  144  :  Error =  0.0999
Epoch  145  :  Error =  0.0999
Epoch  146  :  Error =  0.0999
Epoch  147  :  Error =  0.0999
Epoch  148  :  Error =  0.0999
Epoch  149  :  Error =  0.0999
Epoch  150  :  Error =  0.0999
Epoch  151  :  Error =  0.0998
Epoch  152  :  Error =  0.0998
Epoch  153  :  Error =  0.0998
Epoch  154  :  Error =  0.0998
Epoch  155  :  Error =  0.0998
Epoch  156  :  Error =  0.0998
Epoch  157  :  Error =  0.0998
Epoch  158  :  Error =  0.0998
Epoch  159  :  Error =  0.0998
Epoch  160  :  Error =  0.0998
Epoch  161  :  Error =  0.0998
Epoch  162  :  Error =  0.0998
Epoch  163  :  Error =  0.0998
Epoch  164  :  Error =  0.0998
Epoch  165  :  Error =  0.0998
Epoch  166  :  Error =  0.0998
Epoch  167  :  Error =  0.0998
Epoch  168  :  Error =  0.0998
Epoch  169  :  Error =  0.0998
Epoch  1

Epoch  6  :  Error =  0.0996
Epoch  7  :  Error =  0.0995
Epoch  8  :  Error =  0.0995
Epoch  9  :  Error =  0.0995
Epoch  10  :  Error =  0.0995
Epoch  11  :  Error =  0.0995
Epoch  12  :  Error =  0.0995
Epoch  13  :  Error =  0.0995
Epoch  14  :  Error =  0.0995
Epoch  15  :  Error =  0.0995
Epoch  16  :  Error =  0.0995
Epoch  17  :  Error =  0.0995
Epoch  18  :  Error =  0.0995
Epoch  19  :  Error =  0.0995
Epoch  20  :  Error =  0.0995
Epoch  21  :  Error =  0.0995
Epoch  22  :  Error =  0.0995
Epoch  23  :  Error =  0.0995
Epoch  24  :  Error =  0.0995
Epoch  25  :  Error =  0.0995
Epoch  26  :  Error =  0.0995
Epoch  27  :  Error =  0.0995
Epoch  28  :  Error =  0.0995
Epoch  29  :  Error =  0.0995
Epoch  30  :  Error =  0.0995
Epoch  31  :  Error =  0.0995
Epoch  32  :  Error =  0.0995
Epoch  33  :  Error =  0.0995
Epoch  34  :  Error =  0.0995
Epoch  35  :  Error =  0.0995
Epoch  36  :  Error =  0.0995
Epoch  37  :  Error =  0.0995
Epoch  38  :  Error =  0.0995
Epoch  39  :  