In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
import sys

# Logistic Regression

In [4]:
iris = datasets.load_iris()

In [5]:
def cross_val_split_set(X,portion,y=None):
    X = np.array(X)
    y = np.array(y)
    size = int(X.shape[0]*portion)
    indexlist = np.arange(X.shape[0])
    testinds = np.random.choice(indexlist, size, replace=False)
    traininds = np.array([x for x in range(X.shape[0]) if x not in testinds])  
    if np.all(y == None):
        return X[traininds],X[testinds]
    else:
        return X[traininds],X[testinds],y[traininds],y[testinds]

In [6]:
X = iris.data
y = iris.target
indices = np.append(np.where(y==0)[0],np.where(y==1)[0])
X = X[indices]
y = y[indices]
X_train, X_test, y_train, y_test = cross_val_split_set(X,0.1,y)

In [7]:
def sigmoid(x,derivative=False):
	'''
	sigmoid function, set derivative = true to get the derivative
	'''
	if derivative==True:
		return 1/(1+np.e**-(x*1.0))*(1-(1/(1+np.e**-(x*1.0))))
	else:
		return 1/(1+np.e**-(x*1.0))

In [43]:
def Batch_Gradient_Descent(X,y,parameters,gradient_func,predict_func,learning_rate=0.001,epochs=200,batch_size=32):
    
    for i in range(epochs):
        
        h = predict_func(X,parameters)
        indices = np.arange(X.shape[0])
        np.random.shuffle(indices)
        sample = 0
        
        while(sample < X.shape[0]):
            
            batch_X = X[indices[sample:(sample+batch_size)]]
            batch_y = y[indices[sample:(sample+batch_size)]]
            batch_h = h[indices[sample:(sample+batch_size)]]
            sample += batch_size

            parameters = parameters - learning_rate*gradient_func(parameters,batch_X,batch_y)
            
        print("EPOCHS: " + str(i))
                   
    return parameters 
    

In [44]:
indices = np.arange(y.shape[0])
np.random.shuffle(indices)
indices.shape

(100,)

In [45]:
sys.path.append('../tools')
import tools
import iterative_methods

class LogisticRegression():
    
    def __init__(self,X,y):
        
        self.X = X
        self.y = y
        self.weights = np.random.uniform(10,size=X.shape[1])
        
    def gradient_func(self,parameters,X,y):
        
        h = self.predict(X,parameters)
        return (X.T.dot(h-y))
    
    def predict(self,X,parameters = 0):
        
        if np.all(parameters == 0):
            parameters = self.weights
            predictions = tools.sigmoid(X.dot(parameters)).astype(float)
            predictions[predictions > 0.5] = 1
            predictions[predictions <= 0.5] = 0
            return predictions
        return tools.sigmoid(X.dot(parameters)).astype(float)
    
    def train(self):
        
        self.weights = Batch_Gradient_Descent(self.X,self.y,self.weights,self.gradient_func,self.predict)
        
        
        

In [46]:
ones = np.ones(X_train.shape[0]).reshape(X_train.shape[0],1).astype(float)
X_train = np.column_stack((ones,X_train))

In [47]:
lr = LogisticRegression(X_train,y_train)

In [48]:
lr.train()

(90,)
0
32
64
EPOCHS: 0
(90,)
0
32
64
EPOCHS: 1
(90,)
0
32
64
EPOCHS: 2
(90,)
0
32
64
EPOCHS: 3
(90,)
0
32
64
EPOCHS: 4
(90,)
0
32
64
EPOCHS: 5
(90,)
0
32
64
EPOCHS: 6
(90,)
0
32
64
EPOCHS: 7
(90,)
0
32
64
EPOCHS: 8
(90,)
0
32
64
EPOCHS: 9
(90,)
0
32
64
EPOCHS: 10
(90,)
0
32
64
EPOCHS: 11
(90,)
0
32
64
EPOCHS: 12
(90,)
0
32
64
EPOCHS: 13
(90,)
0
32
64
EPOCHS: 14
(90,)
0
32
64
EPOCHS: 15
(90,)
0
32
64
EPOCHS: 16
(90,)
0
32
64
EPOCHS: 17
(90,)
0
32
64
EPOCHS: 18
(90,)
0
32
64
EPOCHS: 19
(90,)
0
32
64
EPOCHS: 20
(90,)
0
32
64
EPOCHS: 21
(90,)
0
32
64
EPOCHS: 22
(90,)
0
32
64
EPOCHS: 23
(90,)
0
32
64
EPOCHS: 24
(90,)
0
32
64
EPOCHS: 25
(90,)
0
32
64
EPOCHS: 26
(90,)
0
32
64
EPOCHS: 27
(90,)
0
32
64
EPOCHS: 28
(90,)
0
32
64
EPOCHS: 29
(90,)
0
32
64
EPOCHS: 30
(90,)
0
32
64
EPOCHS: 31
(90,)
0
32
64
EPOCHS: 32
(90,)
0
32
64
EPOCHS: 33
(90,)
0
32
64
EPOCHS: 34
(90,)
0
32
64
EPOCHS: 35
(90,)
0
32
64
EPOCHS: 36
(90,)
0
32
64
EPOCHS: 37
(90,)
0
32
64
EPOCHS: 38
(90,)
0
32
64
EPOCHS: 39
(90,)
0
32

In [11]:
ones = np.ones(X_test.shape[0]).reshape(X_test.shape[0],1)
X_test = np.column_stack((ones,X_test))

In [12]:
predictions = lr.predict(X_test)

In [13]:
y_test

array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1])

In [14]:
predictions

array([1., 1., 0., 0., 1., 1., 0., 1., 1., 1.])

In [15]:
def calc_accuracy(obj,xtest,ytest):
    predictions = obj.predict(xtest)
    acc = ytest - predictions
    return np.where(acc == 0)[0].shape[0]/ytest.shape[0]

In [16]:
acc = y_test - predictions

In [17]:
calc_accuracy(lr,X_test,y_test)

1.0