### Importing libraries and loading data

In [13]:
%matplotlib inline

import math
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, pairwise_distances
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

(data, target) = load_breast_cancer(return_X_y=True)

### Utilities and helpers

In [14]:
def show_result(y_true, y_pred):
    print('Accuracy score: %.4f' % accuracy_score(y_true, y_pred))
    print()

def train_and_test(classifier):
    pipe = make_pipeline(StandardScaler(), classifier)
#     pipe = make_pipeline(classifier)
    pipe.fit(X_train, y_train)
    Y = pipe.predict(X_test)
    show_result(y_test, Y)    

### Splitting data

In [15]:
(X_train, X_test, y_train, y_test) = train_test_split(data, target, test_size=0.2)

### Logistic regression implementation

In [16]:
class MyLogisticRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, 
                 append_intercept = True, 
                 learning_rate = 0.01, 
                 iterations = 3000,
                 prediction_threshold = 0.5):
        self.append_intercept = append_intercept
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.prediction_threshold = prediction_threshold
        
    def __sigmoid(self, x):
        return 1./(1. + np.exp(-x))
    
    def __predict_proba(self, X, theta):
        logits = np.dot(X, theta)
        return self.__sigmoid(logits)
    
    def __gradients(self, X, y, proba):
        gradients = X.T.dot(y - proba)
        gradients *= self.learning_rate
        return gradients
    
    def __append_intercept(self, a):
        intercept = np.ones((a.shape[0], 1))
        return np.hstack((intercept, a))
        
    def fit(self, X, y):
        if self.append_intercept:
            X = self.__append_intercept(X)
            
        theta = np.zeros(X.shape[1])
        for iteration in range(self.iterations):
            proba = self.__predict_proba(X, theta)
            gradients = self.__gradients(X, y, proba)
            theta -= gradients
            
        self.theta = theta

    def predict(self, X):
        if self.append_intercept:
            X = self.__append_intercept(X)
            
        proba = self.__predict_proba(X, self.theta)
        indices = np.argwhere(proba > self.prediction_threshold)
        res = np.ones(X.shape[0])
        res[indices] = 0
        return res

### Using MyLogisticRegression classifier

In [17]:
train_and_test(MyLogisticRegression())

  del sys.path[0]


Accuracy score: 0.8772



### Using LogisticRegression from sklearn to test data

In [18]:
train_and_test(LogisticRegression())

Accuracy score: 0.9737

