## Logistic Regression from scratch

derivative blog: https://medium.com/analytics-vidhya/derivative-of-log-loss-function-for-logistic-regression-9b832f025c2d

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [4]:
data = pd.read_csv('../data/Iris.csv')
data = data.drop('Id', axis=1)
data.shape

(150, 5)

In [5]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
data['Species'].value_counts()

In [7]:
data =data[data['Species'].isin(['Iris-setosa', 'Iris-versicolor']) ]

In [9]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [11]:
label_encode = LabelEncoder()
data['Species'] = label_encode.fit_transform(data['Species'])

In [12]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [22]:
data.iloc[:,:4]

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
95,5.7,3.0,4.2,1.2
96,5.7,2.9,4.2,1.3
97,6.2,2.9,4.3,1.3
98,5.1,2.5,3.0,1.1


In [23]:
x_train, x_test, y_train, y_test = train_test_split(data.iloc[:,:4], data['Species'])

In [36]:
class LogisticRegression():
    def __init__(self,iterations, learning_rate):
        self.iterations = iterations
        self.learning_rate = learning_rate
    
    def sigmoid(self, X):
        return 1/(1+np.exp(-(X.dot(self.W)+self.b)))
    
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.m, self.n = X.shape
        self.W = np.zeros(self.n)
        self.b = 0
        
        for i in range(self.iterations):
            self.update_weights()
        return self
    
    def update_weights(self):
        # calculate gradients
        y_hat = self.predict(self.X)
        tmp = (y_hat-self.y.T)
        tmp = np.reshape( tmp, self.m ) 
        dW = np.dot( self.X.T, tmp ) / self.m 
        db = np.sum( tmp ) / self.m 
        
        self.W = self.W - self.learning_rate * dW  
        self.b = self.b - self.learning_rate * db
        
        return self
    
    def predict(self, X):
        Z = self.sigmoid(X)
        Y = np.where(Z>0.5,1,0)
        return Y
    

In [37]:
lr_model = LogisticRegression( learning_rate = 0.01, iterations = 100)
lr_model.fit(x_train, y_train)

<__main__.LogisticRegression at 0x153fd4490>

In [38]:
pred = lr_model.predict(x_test)

In [39]:
pred

array([1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1,
       0, 1, 0])