<a href="https://www.kaggle.com/code/samithsachidanandan/logistic-regression-from-scratch-in-python?scriptVersionId=265969160" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

### Mathematical Implementation of Logistics Regression 

In [1]:
import numpy as np 

In [2]:
def sigmoid(z):
    return 1.0/ (1.0 + np.exp(-z))

In [3]:
def calculate_gradient(theta, X, y):
    m = y.size
    return (X.T @ (sigmoid(X @ theta) - y))/m 

In [4]:
def gradient_descent(X, y , alpha=0.1, num_iter=100, tol=1e-7):
    X_b = np.c_[np.ones((X.shape[0],1)), X]

    theta = np.zeros(X_b.shape[1])

    for i in range(num_iter):
        grad = calculate_gradient(theta, X_b, y )
        theta -= alpha * grad

        if np.linalg.norm(grad) < tol: 
            break

    return theta 

In [5]:
def predict_proba(X, theta):
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    return sigmoid(X_b @ theta)

In [6]:
def predict(X, theta, threshold=0.5):
    return (predict_proba(X, theta) >= threshold ).astype(int)

### Preprocessing of the Data and Evaluation 

In [7]:
from sklearn.datasets import load_breast_cancer 
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 


X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
theta_hat = gradient_descent(X_train_scaled, y_train, alpha = 0.1)

y_pred_train = predict(X_train_scaled, theta_hat)
y_pred_test = predict(X_test_scaled, theta_hat)


train_acc = accuracy_score(y_train, y_pred_train)
test_acc = accuracy_score(y_test, y_pred_test)

print(train_acc)
print(test_acc)


0.9824175824175824
0.9824561403508771
