In [81]:
import numpy as np

In [108]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [109]:
def calculate_gradient(theta, X, y):
    m = y.size      # number of instances
    return (X.T @ (sigmoid(X @ theta) - y)) / m

In [110]:
def gradient_descent(X, y, lr = 0.01, epochs = 1000):
    X_b = np.c_[np.ones((X.shape[0], 1)), X]

    theta = np.zeros(X_b.shape[1])

    for i in range(epochs):
        grad = calculate_gradient(theta, X_b, y)
        theta -= lr * grad

    return theta

In [111]:
def predict_proba(X, theta):
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    return sigmoid(X_b @ theta)

In [112]:
def predtict(X, theta, threshold = 0.5):
    return (predict_proba(X, theta) >= threshold).astype(int)

In [113]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [114]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=0)

scaler = StandardScaler()

X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

In [115]:
theta_hat = gradient_descent(X_train_s, y_train)

In [116]:
y_train_pred = predtict(X_train_s, theta_hat)
y_test_pred = predtict(X_test_s, theta_hat)

In [117]:
train_accuracy = accuracy_score(y_train, y_train_pred)
train_confusion_matrix = confusion_matrix(y_train, y_train_pred)
print(f"Training Accuracy = {train_accuracy}")
print(f"Training Confusion Matrix = \n{train_confusion_matrix}")

Training Accuracy = 0.978021978021978
Training Confusion Matrix = 
[[157   8]
 [  2 288]]


In [118]:
test_accuracy = accuracy_score(y_test, y_test_pred)
test_confusion_matrix = confusion_matrix(y_test, y_test_pred)
print(f"Testing Accuracy = {test_accuracy}")
print(f"Testing Confusion Matrix = \n{test_confusion_matrix}")

Testing Accuracy = 0.9649122807017544
Testing Confusion Matrix = 
[[45  2]
 [ 2 65]]
