### Importing libraries and loading data

In [12]:
%matplotlib inline

import math
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, pairwise_distances
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt

(data, target) = load_breast_cancer(return_X_y=True)

### Utilities and helpers

In [13]:
def show_result(y_true, y_pred):
    print('Accuracy score: %.4f' % accuracy_score(y_true, y_pred))
    print()

def train_and_test(classifier):
    pipe = make_pipeline(StandardScaler(), classifier)
#     pipe = classifier
    pipe.fit(X_train, y_train)
    Y = pipe.predict(X_test)
    show_result(y_test, Y)    

### Splitting data

In [14]:
(X_train, X_test, y_train, y_test) = train_test_split(data, target, test_size=0.2)

### Logistic regression implementation

In [15]:
class MyLogisticRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, 
                 append_intercept = True, 
                 learning_rate = 10, 
                 iterations = 300,
                 prediction_threshold = 0.5):
        self.append_intercept = append_intercept
        self.learning_rate = learning_rate
        self.iterations = iterations
        self.prediction_threshold = prediction_threshold
        
    def __sigmoid(self, x):
        return 1./(1. + np.exp(-x))
    
    def __predict_proba(self, X, theta):
        logits = X.dot(theta)
        return self.__sigmoid(logits)
    
    def __gradients(self, X, y, proba):
        gradients = X.T.dot(proba - y)
        gradients *= self.learning_rate
        return gradients
    
    def __append_intercept(self, a):
        intercept = np.ones((a.shape[0], 1))
        return np.hstack((intercept, a))
        
    def fit(self, X, y):
        if self.append_intercept:
            X = self.__append_intercept(X)
            
        theta = np.zeros(X.shape[1])
        for iteration in range(self.iterations):
            proba = self.__predict_proba(X, theta)
            gradients = self.__gradients(X, y, proba)
            theta -= gradients
            
        self.theta = theta

    def predict(self, X):
        if self.append_intercept:
            X = self.__append_intercept(X)
            
        proba = self.__predict_proba(X, self.theta)
        indices = np.argwhere(proba < self.prediction_threshold)
        res = np.ones(X.shape[0])
        res[indices] = 0
        return res

### Using MyLogisticRegression classifier

In [16]:
def find_best_params(classifier):
    parameters = {
        'learning_rate': [0.001, 0.01, 0.1, 1, 10, 100, 1000],
        'iterations': [5, 500, 5000]
    }
    grid_search = GridSearchCV(classifier, parameters, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)
    print(grid_search.best_params_)

    cvres = grid_search.cv_results_
    for ms, params in zip(cvres["mean_test_score"], cvres["params"]):
        print(int(params["learning_rate"]), ':', np.sqrt(-ms))
        print(int(params["iterations"]), ':', np.sqrt(-ms))
    
    return grid_search.best_params_

train_and_test(MyLogisticRegression(iterations=5000, learning_rate=0.01))

Accuracy score: 0.9737



### Using LogisticRegression from sklearn to test data

In [17]:
train_and_test(LogisticRegression())

Accuracy score: 0.9825

