In [None]:
import numpy as np
import matplotlib as plt
import pickle
from sklearn.metrics import precision_score, accuracy_score

# 1 - Creating Logistic Regression 
This class include sigmoid, relu, binary cross entropy and mean squared error implementation


In [None]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01) -> None:
        self.w = None
        self.b = None

        self.learning_rate = learning_rate
        self.epsilon = 1e-15
        self.cost_track = {}

    def sigmoid(self, a):
        return 1 / (1 + np.exp(-a))

    def feed_forward(self):
        return self.sigmoid(
            np.dot(self.x_train, self.w) + self.b
        )

    def binary_crossentropy(self, y, y_pred): # also named as log loss 
        # clip because preventing log(0) error
        y_pred = np.clip(y_pred, self.epsilon, 1 - 1e-3)
        return - (1 / self.m) * np.sum((y * np.log(y_pred)) + ((1 - y) * np.log(1 - y_pred)))

    def back_propagation(self, y_pred):
        cost = self.binary_crossentropy(self.y_train, y_pred)
        dw = (1 / self.m) * np.dot(self.x_train.T, (y_pred - self.y_train))
        db = (1 / self.m) * np.sum(y_pred - self.y_train)

        # updating weights and bias
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

        return cost

    def save(self, file_path):
        with open(file_path, "wb") as f:
            pickle.dump({"w": self.w, "b": self.b}, f, protocol=pickle.HIGHEST_PROTOCOL)

    def load(self, file_path):
        with open(file_path, "rb") as f:
            temp = pickle.load(f)

        self.w = temp["w"]
        self.b = temp["b"]
        del temp

    def initialize_paramters(self):
        (self.m, feature_count) = self.x_train.shape[0], self.x_train.shape[1]
        # initialize weights and bias
        self.w = np.zeros((feature_count, 1))
        self.b = np.zeros((1, 1))

    def calculate_accuracy(self, y_pred, y):
        return  ((y_pred > 0.5) == y).sum() / self.m

    def fit(self, x_train, y_train, epoch, learning_rate=0.001, print_cost=True):
        self.x_train = x_train
        self.y_train = y_train.reshape(-1, 1)

        self.initialize_paramters()
        
        for e in range(1, epoch + 1):
            y_pred = self.feed_forward()
            cost = self.back_propagation(y_pred)

            if e % 100 == 0:
                print(f"Epoch: {e}, train_loss: {np.squeeze(cost)}, train_accuracy: {self.calculate_accuracy(y_pred, self.y_train)}")
                self.cost_track.setdefault(e, cost)

        del self.x_train
        del self.y_train


    def evaluate(self, x_test, y_test):
        # @TODO plot and classification report
        pass


    def predict(self, x):
        # @TODO single data prediction
        pass

# 2 - Load Heart Disease Dataset

In [None]:
import pandas as pd

df = pd.read_csv("framingham.csv").dropna()

df.info()

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
x = df.drop("TenYearCHD", axis=1).values
y = df["TenYearCHD"].values

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [None]:
#creating model object

lrm_hearth = LogisticRegression()
lrm_hearth.fit(x_train=x_train, y_train=y_train, epoch=1000, learning_rate=0.000000015)

In [None]:
# lrm.save("./test.pckl")
# lrm.load("./test.pckl")

# 3 - Diabetics Dataset

In [None]:
diabetics_df = pd.read_csv("diabetes2.csv")

diabetics_df.info()

In [None]:
diabetics_df.head()

In [None]:
diabetics_df.tail()

In [None]:
x = diabetics_df.drop("Outcome", axis=1).values
y = diabetics_df["Outcome"].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [None]:

lrm_diabet = LogisticRegression()
lrm_diabet.fit(x_train=x_train, y_train=y_train, epoch=1000, learning_rate=0.000000015)