In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
import sys

# Logistic Regression

In [2]:
iris = datasets.load_iris()

In [3]:
def cross_val_split_set(X,portion,y=None):
    X = np.array(X)
    y = np.array(y)
    size = int(X.shape[0]*portion)
    indexlist = np.arange(X.shape[0])
    testinds = np.random.choice(indexlist, size, replace=False)
    traininds = np.array([x for x in range(X.shape[0]) if x not in testinds])  
    if np.all(y == None):
        return X[traininds],X[testinds]
    else:
        return X[traininds],X[testinds],y[traininds],y[testinds]

In [4]:
X = iris.data
y = iris.target
indices = np.append(np.where(y==0)[0],np.where(y==1)[0])
X = X[indices]
y = y[indices]
X_train, X_test, y_train, y_test = cross_val_split_set(X,0.1,y)

In [5]:
def sigmoid(x,derivative=False):
	'''
	sigmoid function, set derivative = true to get the derivative
	'''
	if derivative==True:
		return 1/(1+np.e**-(x*1.0))*(1-(1/(1+np.e**-(x*1.0))))
	else:
		return 1/(1+np.e**-(x*1.0))

In [6]:
def Batch_Gradient_Descent(X,y,parameters,gradient_func,predict_func,learning_rate=0.001,epochs=200,batch_size=32):
    
    for i in range(epochs):
        
        h = predict_func(X,parameters)
        indices = np.arange(y.shape[0])
        np.random.shuffle(indices)
        sample = 0

        while(sample < y.shape[0]):

            batch_X = X[sample:(sample+batch_size)]
            batch_y = y[sample:(sample+batch_size)]
            batch_h = h[sample:(sample+batch_size)]
            sample += batch_size

            parameters = parameters - learning_rate*gradient_func(parameters,batch_X,batch_y)
            
        print("EPOCHS: " + str(i))
                   
    return parameters 
    

In [7]:
sys.path.append('../tools')
import tools
import iterative_methods

class LogisticRegression():
    
    def __init__(self,X,y):
        
        self.X = X
        self.y = y
        self.weights = np.random.uniform(10,size=X.shape[1])
        
    def gradient_func(self,parameters,X,y):
        
        h = self.predict(X,parameters)
        return (X.T.dot(h-y))
    
    def predict(self,X,parameters = 0):
        
        if np.all(parameters == 0):
            parameters = self.weights
            predictions = tools.sigmoid(X.dot(parameters)).astype(float)
            predictions[predictions > 0.5] = 1
            predictions[predictions <= 0.5] = 0
            return predictions
        return tools.sigmoid(X.dot(parameters)).astype(float)
    
    def train(self):
        
        self.weights = iterative_methods.Batch_Gradient_Descent(self.X,self.y,self.weights,self.gradient_func,self.predict)
        
        
        

In [8]:
ones = np.ones(X_train.shape[0]).reshape(X_train.shape[0],1).astype(float)
X_train = np.column_stack((ones,X_train))

In [9]:
lr = LogisticRegression(X_train,y_train)

In [10]:
lr.train()

EPOCHS: 0
EPOCHS: 1
EPOCHS: 2
EPOCHS: 3
EPOCHS: 4
EPOCHS: 5
EPOCHS: 6
EPOCHS: 7
EPOCHS: 8
EPOCHS: 9
EPOCHS: 10
EPOCHS: 11
EPOCHS: 12
EPOCHS: 13
EPOCHS: 14
EPOCHS: 15
EPOCHS: 16
EPOCHS: 17
EPOCHS: 18
EPOCHS: 19
EPOCHS: 20
EPOCHS: 21
EPOCHS: 22
EPOCHS: 23
EPOCHS: 24
EPOCHS: 25
EPOCHS: 26
EPOCHS: 27
EPOCHS: 28
EPOCHS: 29
EPOCHS: 30
EPOCHS: 31
EPOCHS: 32
EPOCHS: 33
EPOCHS: 34
EPOCHS: 35
EPOCHS: 36
EPOCHS: 37
EPOCHS: 38
EPOCHS: 39
EPOCHS: 40
EPOCHS: 41
EPOCHS: 42
EPOCHS: 43
EPOCHS: 44
EPOCHS: 45
EPOCHS: 46
EPOCHS: 47
EPOCHS: 48
EPOCHS: 49
EPOCHS: 50
EPOCHS: 51
EPOCHS: 52
EPOCHS: 53
EPOCHS: 54
EPOCHS: 55
EPOCHS: 56
EPOCHS: 57
EPOCHS: 58
EPOCHS: 59
EPOCHS: 60
EPOCHS: 61
EPOCHS: 62
EPOCHS: 63
EPOCHS: 64
EPOCHS: 65
EPOCHS: 66
EPOCHS: 67
EPOCHS: 68
EPOCHS: 69
EPOCHS: 70
EPOCHS: 71
EPOCHS: 72
EPOCHS: 73
EPOCHS: 74
EPOCHS: 75
EPOCHS: 76
EPOCHS: 77
EPOCHS: 78
EPOCHS: 79
EPOCHS: 80
EPOCHS: 81
EPOCHS: 82
EPOCHS: 83
EPOCHS: 84
EPOCHS: 85
EPOCHS: 86
EPOCHS: 87
EPOCHS: 88
EPOCHS: 89
EPOCHS: 90
EPOCHS: 9

In [11]:
ones = np.ones(X_test.shape[0]).reshape(X_test.shape[0],1)
X_test = np.column_stack((ones,X_test))

In [12]:
predictions = lr.predict(X_test)

In [13]:
y_test

array([1, 1, 0, 0, 1, 1, 0, 1, 1, 1])

In [14]:
predictions

array([1., 1., 0., 0., 1., 1., 0., 1., 1., 1.])

In [15]:
def calc_accuracy(obj,xtest,ytest):
    predictions = obj.predict(xtest)
    acc = ytest - predictions
    return np.where(acc == 0)[0].shape[0]/ytest.shape[0]

In [16]:
acc = y_test - predictions

In [17]:
calc_accuracy(lr,X_test,y_test)

1.0