## Logistic Regression

### Logistic Regression - Manual Implementation

<img src = "https://ml-cheatsheet.readthedocs.io/en/latest/_images/logistic_cost_function_vectorized.png"></img>

In [1]:
from sklearn.datasets import load_breast_cancer
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import pandas as pd

In [2]:
df_breast_cancer = pd.DataFrame(load_breast_cancer()['data'], columns= load_breast_cancer()['feature_names'])
scaler = StandardScaler()
df_breast_cancer = pd.DataFrame(scaler.fit_transform(df_breast_cancer), columns = df_breast_cancer.columns)
df_breast_cancer['target'] = load_breast_cancer()['target']

In [3]:
def sigmoid(z):
    return 1.0 / (1 + np.exp(-z))

def predict(features, weights):
    z = features.dot(weights)
    return sigmoid(z)

def cost_calculate(features, target, weights):
    predictions = predict(features, weights)
    cost = (-target.dot(np.log(predictions)) - ((1-target).dot(np.log(1-predictions))))
    return cost

def classify(predictions):
    return predictions.apply(lambda x: 1 if x>=0.5 else 0)

In [4]:
def logistic_regression_GD(orig_dataset, features, target, n_iter = 1e4, lr = 1e-4, tolerance = 1e-6):
    
    dataset = orig_dataset.copy()
    
    feat = ['coef']
    dataset['coef'] = 1
    feat.extend(features)
    feat = features
    
    #print(features)
    H = dataset[features]
    y = dataset[target]
    
    weights = np.random.randn(len(features))
    
    cost_prev = np.inf
    for i in range(int(n_iter)):
        
        predictions = predict(H, weights)
        cost_ = cost_calculate(H, y, weights)
        
        if cost_ > cost_prev:
            print('Reducing Learning Rate')
            lr = lr/10
    
        #print(cost_)
        gradient = (H.T.dot(predictions - y))
        
        if abs(cost_ - cost_prev) < tolerance:
            break
            
        cost_prev = cost_ 
        weights -= lr * gradient

    predictions = predict(H, weights)
    y_pred = classify(predictions).values
    
    print('Accuracy', accuracy_score(y, y_pred))
    print('Confusion Matrix\n ', confusion_matrix(y, y_pred))

In [5]:
%%time
logistic_regression_GD(df_breast_cancer, df_breast_cancer.drop('target', axis=1).columns, 'target')

Accuracy 0.9894551845342706
Confusion Matrix
  [[207   5]
 [  1 356]]
Wall time: 1min 6s


### Logistic Regression - Sklearn Implementation

In [6]:
%%time

from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(df_breast_cancer.drop('target', axis=1), df_breast_cancer['target'])

y_pred_sklearn = clf.predict(df_breast_cancer.drop('target', axis=1))

print('Accuracy', accuracy_score(df_breast_cancer['target'], y_pred_sklearn))
print('Confusion Matrix\n ', confusion_matrix(df_breast_cancer['target'], y_pred_sklearn))

Accuracy 0.9876977152899824
Confusion Matrix
  [[207   5]
 [  2 355]]
Wall time: 224 ms


## Next Action Items:

* Implementation of Ridge & Lasso Regularization