## Logistic Regression
In this notebook we will implement logistic regression with gradient descent

In [67]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode, iplot
from sklearn.datasets import make_classification,make_blobs
import warnings
warnings.filterwarnings("ignore")

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [69]:
class LogisticRegression:
    def __init__(self,epochs=100,learning_rate=.01):
        self.epochs = epochs
        self.learning_rate = learning_rate
        self.log = {'train_loss':[],'epoch':[]}
    
    def predict(self,X,w,b):
        z = np.dot(X,w) + b
        y_hat = 1.0/(1 + np.exp(-z))
        return y_hat
    
    def predict_label(self,X,w,b):
        y_hat = self.predict(X,w,b)
        y_labels = y_hat>.5
        return y_labels
    
    def cost(self,y_hat,y):
        cost = -(1/self.m)*np.sum((y*np.log(y_hat)+(1-y)*np.log(1-y_hat)))
        return cost
    
    def gradient_descent(self,w,b,X,y,y_hat):
        dCdw = (-1/self.m)*np.dot(X.T,(y-y_hat))
        dCdb = (-1/self.m)*np.sum(y-y_hat)
        w = w - self.learning_rate * dCdw
        b = b - self.learning_rate * dCdb
        return w,b
    
    def plot_loss(self):
        loss = self.log['train_loss']
        epoch = self.log['epoch']
        x=epoch
        y=loss
        trace = go.Scatter(x=x,y=y,marker=dict(color='#ffdc51'))
        return trace
    
    def fit(self,X,y):
        self.m = X.shape[0]
        self.n = X.shape[1]
        w = np.zeros((self.n, 1))
        b = 1
        for epoch in range(self.epochs+1):
            y_hat = self.predict(X, w, b)
            cost = self.cost(y_hat, y)
            self.log['train_loss'].append(cost)
            self.log['epoch'].append(epoch)
            if epoch % 100 == 0:
                print(f"Loss at epoch: {epoch} is {cost}")
                
            w,b = self.gradient_descent(w,b, X, y, y_hat)
        
        return w,b

In [70]:
random_seed = 22
X, y = make_blobs(n_samples=1000, centers=2)
y = y[:, np.newaxis]
logistic_regression = LogisticRegression(epochs=1000)
w,b = logistic_regression.fit(X, y)

Loss at epoch: 0 is 0.8132616875182228
Loss at epoch: 100 is 0.017954387135383443
Loss at epoch: 200 is 0.00918975825857946
Loss at epoch: 300 is 0.0062070652373658955
Loss at epoch: 400 is 0.004698901458959189
Loss at epoch: 500 is 0.0037867968820089926
Loss at epoch: 600 is 0.0031749242485173
Loss at epoch: 700 is 0.002735602005425611
Loss at epoch: 800 is 0.0024046386722661683
Loss at epoch: 900 is 0.0021462107895987774
Loss at epoch: 1000 is 0.0019387409073048241


In [71]:
data = logistic_regression.plot_loss()
fig1 = go.Figure(data = [data])
iplot(fig1)

In [73]:
y_pred = logistic_regression.predict_label(X,w,b)
print("Accuracy: ", np.sum(y==y_pred)/X.shape[0])

Accuracy:  1.0
