In [1]:
import numpy as np

class LogisticRegression_hand:
    def __init__(self, learning_rate=0.01, num_iterations=50000, fit_intercept=True):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.fit_intercept = fit_intercept

    def __intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid_function(self, z):
        return 1 / (1 + np.exp(-z))
    
    def __loss(self, yp, y):
        return (-y * np.log(yp) - (1 - y) * np.log(1 - yp)).mean()
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__intercept(X)
        self.W = np.zeros(X.shape[1])
        
        for i in range(self.num_iterations):
            z = np.dot(X, self.W)
            yp = self.__sigmoid_function(z)
            gradient = np.dot(X.T, (yp - y)) / y.size
            self.W -= self.learning_rate * gradient
            z = np.dot(X, self.W)
            yp = self.__sigmoid_function(z)
            loss = self.__loss(yp, y)
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__intercept(X)
        return self.__sigmoid_function(np.dot(X, self.W))
    
    def predict(self, X):
        return self.predict_prob(X).round()



In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("/home/rootsyl/Stat/datasets/Heartdesease/heart.csv")
df.sample(frac=1)
df_train = df.iloc[:round(len(df)*.8)]
df_test = df.iloc[round(len(df)*.8):]

In [62]:
df_test.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
242,64,1,0,145,212,0,0,132,0,2.0,1,2,1,0
243,57,1,0,152,274,0,1,88,1,1.2,1,1,3,0
244,56,1,0,132,184,0,0,105,1,2.1,1,1,1,0
245,48,1,0,124,274,0,0,166,0,0.5,1,0,3,0
246,56,0,0,134,409,0,0,150,1,1.9,1,2,3,0


In [4]:
df_test_x = df_test.drop('target',axis=1)
df_test_y = df_test['target']
df_train_x = df_train.drop('target',axis=1)
df_train_y = df_train['target']

In [5]:
df_train_x

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
237,60,1,0,140,293,0,0,170,0,1.2,1,2,3
238,77,1,0,125,304,0,0,162,1,0.0,2,3,2
239,35,1,0,126,282,0,0,156,1,0.0,2,0,3
240,70,1,2,160,269,0,1,112,1,2.9,1,1,3


In [64]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(fit_intercept=True, solver='liblinear')
model_hand = LogisticRegression_hand()

In [65]:
model.fit(df_train_x, df_train_y)

LogisticRegression(solver='liblinear')

In [66]:
model_hand.fit(df_train_x,df_train_y)

  return (-y * np.log(yp) - (1 - y) * np.log(1 - yp)).mean()


In [67]:
[model.intercept_, model.coef_]

[array([0.47114019]),
 array([[ 0.01907146, -1.36958126,  0.79876303, -0.00581505, -0.00544029,
         -0.06274211,  0.8584912 ,  0.02718551, -0.70616312, -0.55173549,
          0.53143562, -0.60216379, -1.05632035]])]

In [68]:
model_hand.predict(df_test_x)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1.])

In [69]:
df_test_y.sum()

0

In [70]:
model.predict(df_test_x)

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1])

In [71]:
model.score(df_test_x, df_test_y)

0.639344262295082

In [72]:
from sklearn.metrics import confusion_matrix
confusion_matrix(df_test_y, model.predict(df_test_x))

array([[39, 22],
       [ 0,  0]])

In [73]:
confusion_matrix(df_test_y, model_hand.predict(df_test_x))

array([[59,  2],
       [ 0,  0]])

In [75]:
from sklearn.metrics import accuracy_score
accuracy_score(df_test_y, model_hand.predict(df_test_x))

0.9672131147540983