In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
import sys
sys.path.append('../tools')
import tools
import iterative_methods

# Logistic Regression

In [2]:
iris = datasets.load_iris()

In [3]:
X = iris.data
y = iris.target

In [4]:
def cross_val_split_set(X,portion,y=None):
    X = np.array(X)
    y = np.array(y)
    size = int(X.shape[0]*portion)
    indexlist = np.arange(X.shape[0])
    testinds = np.random.choice(indexlist, size, replace=False)
    traininds = np.array([x for x in range(X.shape[0]) if x not in testinds])  
    if np.all(y == None):
        return X[traininds],X[testinds]
    else:
        return X[traininds],X[testinds],y[traininds],y[testinds]

In [5]:
indices = np.arange(y.shape[0])
np.random.shuffle(indices)

In [6]:
indices = np.append(np.where(y==0)[0],np.where(y==1)[0])
X = X[indices]
y = y[indices]
X_train, X_test, y_train, y_test = cross_val_split_set(X,0.1,y)

In [7]:
indices = np.arange(y.shape[0])
np.random.shuffle(indices)
indices.shape

(100,)

In [8]:
class LogisticRegression():
    
    """
    Logistic Regression class for binary classification
    Parameters:
        X: numpy array() data matrix, must have shape of length two (for vectors, reshape with column = 1)
        y: numpy array() class labels, must be numeric
        weights: numpy array() weights for prediction
    """

    def __init__(self,X,y):
        
        self.X = X
        self.y = y
        self.weights = np.random.uniform(10,size=X.shape[1])
        
    def gradient_func(self,parameters,X,y):
        
        h = self.predict(X,parameters)
        return (X.T.dot(h-y))
    
    def predict(self,X,parameters = None):
        
        if np.all(parameters == None):
            parameters = self.weights
            predictions = tools.sigmoid(X.dot(parameters)).astype(float)
            predictions[predictions > 0.5] = 1
            predictions[predictions <= 0.5] = 0
            return predictions
        return tools.sigmoid(X.dot(parameters)).astype(float)
    
    def train(self,batch_size=1,epochs=100,learning_rate=0.001):
        
        self.weights = iterative_methods.Mini_Batch_Gradient_Descent(self.X,self.y,
            self.weights,self.gradient_func,self.predict,epochs=epochs,batch_size=batch_size,loss="cross_entropy",learning_rate=learning_rate)
        
        print("Train Accuracy: %s" % str(tools.calc_accuracy(self.predict(self.X),
                                                             self.y)))


In [9]:
ones = np.ones(X_train.shape[0]).reshape(X_train.shape[0],1).astype(float)
X_train = np.column_stack((ones,X_train))

In [10]:
lr = LogisticRegression(X_train,y_train)

In [11]:
lr.train(500)

Cross Entropy 0.0640456081045873: 100%|██████████| 100/100 [00:00<00:00, 144.01it/s]  

Train Accuracy: 1.0





In [12]:
ones = np.ones(X_test.shape[0]).reshape(X_test.shape[0],1)
X_test = np.column_stack((ones,X_test))

In [13]:
def calc_accuracy(obj,xtest,ytest):
    predictions = obj.predict(xtest)
    acc = ytest - predictions
    return np.where(acc == 0)[0].shape[0]/ytest.shape[0]

In [14]:
print("Test Accuracy: %s" % str(tools.calc_accuracy(lr.predict(X_test),y_test)))

Test Accuracy: 1.0
