In [1]:
import numpy as np


class LogisticRegression:
    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            # approximate y with linear combination of weights and x, plus bias
            linear_model = np.dot(X, self.weights) + self.bias
            # apply sigmoid function
            y_predicted = self._sigmoid(linear_model)

            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else -1 for i in y_predicted]
        return np.array(y_predicted_cls)
    
    
    # the convex cross-entropy cost function
    def cross_entropy(weights):
        # compute sigmoid of model
        a = self._sigmoid(fit(x,weights))

        # compute cost of label 0 points
        ind = np.argwhere(y == -1)[:,1]
        cost = -np.sum(np.log(1 - a[:,ind]))

        # add cost on label 1 points
        ind = np.argwhere(y==1)[:,1]
        cost -= np.sum(np.log(a[:,ind]))

        # compute cross-entropy
        return cost/y.size
    

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [3]:
import pandas as pd

In [4]:
df = pd.read_csv("cleveland-train.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,heartdisease::category|-1|1
0,67,1,4,120,229,0,2,129,1,2.6,2,2,7,1
1,37,1,3,130,250,0,0,187,0,3.5,3,0,3,-1
2,56,1,2,120,236,0,0,178,0,0.8,1,0,3,-1
3,57,0,4,120,354,0,0,163,1,0.6,1,0,3,-1
4,63,1,4,130,254,0,2,147,0,1.4,2,1,7,1


In [6]:
X = df.drop('heartdisease::category|-1|1', axis=1)
y = df['heartdisease::category|-1|1']

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)
preds = model.predict(X_test)



In [9]:
from sklearn.metrics import accuracy_score, confusion_matrix

print(accuracy_score(y_test, preds))
print(confusion_matrix(y_test, preds))

0.7096774193548387
[[22  0]
 [ 9  0]]
