# Logistic Regression

## Load Data

In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer

In [2]:
X = load_breast_cancer().data
y = load_breast_cancer().target

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

---

## My Logistic Regression

In [5]:
class MyLogisticRegression:
    def __init__(self, max_iter=10):
        self.max_iter = max_iter
    def fit(self, X_train, y_train):
        X_train = np.array(X_train)
        y_train = np.array(y_train)
        beta = np.array([0]*X_train.shape[1])
        for i in range(self.max_iter):
            p1 = np.exp(X_train.dot(beta))/(1+np.exp(X_train.dot(beta)))
            p0 = 1-p1
            W = np.diag(p0*p1)
            beta = beta + np.linalg.inv(np.transpose(X_train).dot(W).dot(X_train)).dot(np.transpose(X_train)).dot(y_train-p1)
            self.beta_new = beta
    def predict(self, X_test):
        X_test = np.array(X_test)
        return (np.exp(X_test.dot(self.beta_new))/(1+np.exp(X_test.dot(self.beta_new))) > 0.5).astype('int')

In [6]:
model = MyLogisticRegression()

In [7]:
model.fit(X_train, y_train)

In [8]:
y_pred = model.predict(X_test)

In [9]:
from sklearn.metrics import zero_one_loss

In [10]:
print('0-1 Loss of My LogisticRegression:', zero_one_loss(y_pred, y_test))

0-1 Loss of My LogisticRegression: 0.052631578947368474


---

## Compare to Scikit-Learn

In [11]:
from sklearn.linear_model import LogisticRegression

In [12]:
logit = LogisticRegression(max_iter=10000)

In [13]:
logit.fit(X_train, y_train)

LogisticRegression(max_iter=10000)

In [14]:
y_pred = logit.predict(X_test)

In [15]:
print('0-1 Loss of sklearn LogisticRegression:', zero_one_loss(y_pred, y_test))

0-1 Loss of sklearn LogisticRegression: 0.040935672514619936
