In [175]:
import numpy as np
import matplotlib as plt

# 1 - Creating Logistic Regression 
This class include sigmoid, relu, binary cross entropy and mean squared error implementation


In [176]:
class LogisticRegression():
    def __init__(self, learning_rate=0.01) -> None:
        self.w = None
        self.b = None

        self.learning_rate = learning_rate
        self.epsilon = 1e-6
        self.cost_track = {}

    def sigmoid(self, a):
        return 1 / (1 + np.exp(-a))

    def feed_forward(self):
        return self.sigmoid(
            np.dot(self.x_train, self.w) + self.b
        )

    def binary_crossentropy(self, y, y_pred): # also named as log loss 
        # clip because preventing log(0) error
        y_pred = np.clip(y_pred, self.epsilon, 1 - 1e-6)
        return - (1 / self.m) * np.sum((y * np.log(y_pred)) + ((1 - y) * np.log(1 - y_pred)))
        
    def back_propagation(self, y_pred):
        cost = self.binary_crossentropy(self.y_train, y_pred)
        dw = (1 / self.m) * np.dot(self.x_train.T, (y_pred - self.y_train))
        db = (1 / self.m) * np.sum(y_pred - self.y_train)

        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

        return cost

    def update_parameters(self, dz):
        self.w = self.w - self.learning_rate * dz
        self.b = self.b - self.learning_rate * dz    

    def save(self):
        # @TODO json save
        pass

    def load(self):
        # @TODO load weights from json
        pass

    def fit(self, x_train, y_train, epoch, learning_rate=0.01, print_cost=True):
        self.x_train = x_train
        self.y_train = y_train.reshape(-1, 1)
        (self.m, self.feature_count) = self.x_train.shape[0], self.x_train.shape[1]
        # initialize weights and bias
        self.w = np.random.random((x_train.shape[1], 1))
        self.b = np.random.random((1, 1))

        for e in range(1, epoch + 1):
            
            a = self.feed_forward()
            cost = self.back_propagation(a)
            print(f"Epoch: {e}, train_loss: {cost}, train_accuracy: {e}")
            self.cost_track.setdefault(e, cost)

        del self.x_train
        del self.y_train


    def evaluate(self, x_test, y_test):
        # @TODO plot and classification report
        pass


    def predict(self, x):
        # @TODO single data prediction
        pass

# 2 - Load Heart Disease Dataset

In [177]:
import pandas as pd

df = pd.read_csv("framingham.csv").dropna()

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3656 entries, 0 to 4237
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   male             3656 non-null   int64  
 1   age              3656 non-null   int64  
 2   education        3656 non-null   float64
 3   currentSmoker    3656 non-null   int64  
 4   cigsPerDay       3656 non-null   float64
 5   BPMeds           3656 non-null   float64
 6   prevalentStroke  3656 non-null   int64  
 7   prevalentHyp     3656 non-null   int64  
 8   diabetes         3656 non-null   int64  
 9   totChol          3656 non-null   float64
 10  sysBP            3656 non-null   float64
 11  diaBP            3656 non-null   float64
 12  BMI              3656 non-null   float64
 13  heartRate        3656 non-null   float64
 14  glucose          3656 non-null   float64
 15  TenYearCHD       3656 non-null   int64  
dtypes: float64(9), int64(7)
memory usage: 485.6 KB


In [178]:
df.head()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [179]:
df.tail()

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
4231,1,58,3.0,0,0.0,0.0,0,1,0,187.0,141.0,81.0,24.96,80.0,81.0,0
4232,1,68,1.0,0,0.0,0.0,0,1,0,176.0,168.0,97.0,23.14,60.0,79.0,1
4233,1,50,1.0,1,1.0,0.0,0,1,0,313.0,179.0,92.0,25.97,66.0,86.0,1
4234,1,51,3.0,1,43.0,0.0,0,0,0,207.0,126.5,80.0,19.71,65.0,68.0,0
4237,0,52,2.0,0,0.0,0.0,0,0,0,269.0,133.5,83.0,21.47,80.0,107.0,0


In [180]:
x = df.drop("TenYearCHD", axis=1).values
x

array([[  1.  ,  39.  ,   4.  , ...,  26.97,  80.  ,  77.  ],
       [  0.  ,  46.  ,   2.  , ...,  28.73,  95.  ,  76.  ],
       [  1.  ,  48.  ,   1.  , ...,  25.34,  75.  ,  70.  ],
       ...,
       [  1.  ,  50.  ,   1.  , ...,  25.97,  66.  ,  86.  ],
       [  1.  ,  51.  ,   3.  , ...,  19.71,  65.  ,  68.  ],
       [  0.  ,  52.  ,   2.  , ...,  21.47,  80.  , 107.  ]])

In [181]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [182]:
#creating model object

lrm = LogisticRegression()
lrm.fit(x_train=x_train, y_train=y_train, epoch=50, learning_rate=0.01)

Epoch: 1, train_loss: 11.75644114648338, train_accuracy: 1
Epoch: 2, train_loss: 2.059070411456922, train_accuracy: 2
Epoch: 3, train_loss: 2.059070411456922, train_accuracy: 3
Epoch: 4, train_loss: 2.059070411456922, train_accuracy: 4
Epoch: 5, train_loss: 10.761548717972614, train_accuracy: 5
Epoch: 6, train_loss: 2.059070411456922, train_accuracy: 6
Epoch: 7, train_loss: 2.059070411456922, train_accuracy: 7
Epoch: 8, train_loss: 2.059070411456922, train_accuracy: 8
Epoch: 9, train_loss: 2.059070411456922, train_accuracy: 9
Epoch: 10, train_loss: 2.059070411456922, train_accuracy: 10
Epoch: 11, train_loss: 3.3756097310802686, train_accuracy: 11
Epoch: 12, train_loss: 2.059070411456922, train_accuracy: 12
Epoch: 13, train_loss: 2.059070411456922, train_accuracy: 13
Epoch: 14, train_loss: 11.756028112355887, train_accuracy: 14
Epoch: 15, train_loss: 2.059070411456922, train_accuracy: 15
Epoch: 16, train_loss: 2.059070411456922, train_accuracy: 16
Epoch: 17, train_loss: 2.05907041145692

  return 1 / (1 + np.exp(-a))
