In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as rd

In [4]:
data_table = pd.read_csv('iris.csv', names =['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'flower'])

In [109]:
data_table.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [5]:
def normalization(matrix, vector):
    #Mean calculation
    mVector = np.mean(vector, axis=0)
    mMatrix = np.mean(matrix, axis=0)
    
    #Standard deviation
    stdVector = np.std(vector, axis = 0)
    stdMatrix = np.std(matrix, axis = 0)
     
    #Normalization of data
    vector = (vector - mVector)/stdVector
    matrix = (matrix - mMatrix)/stdMatrix
    
    return matrix, vector

In [144]:
def gradientDescent(xMatrix, yVector, nEpochs, alpha):
    initialW = np.ones(len(xMatrix[0]), dtype = float);
    cost = []
    yHat = xMatrix.dot(initialW)   
    for i in range(nEpochs):
        summation = 0;
        for y, y_c, x in zip(yVector, yHat, xMatrix):
            error = y - (1/(1 + np.exp(-(initialW * x))))
            summation += error * x
            
        cost.append(-((1/len(xMatrix)) * summation))
        
        nextW =  initialW + (alpha/len(xMatrix))*summation
        yHat = xMatrix.dot(nextW)
        initialW = nextW
    
    return nextW

def stochasticGradientDescent(xMatrix, yVector, nEpochs, alpha):
    cost = []
    data = np.append(xMatrix, np.array([[x] for x in yVector]), axis=1)
    
    initialW = np.ones(len(xMatrix[0]), dtype = float);
    
    yHat = xMatrix.dot(initialW)
    for i in range(nEpochs):
        summation = 0;
        for y, yH, x in zip(yVector, yHat, xMatrix):
            error = y - (1/(1 + np.exp(-yH))) * x
            nextW = initialW + alpha * error 
            summation += error
            
        #cost.append(np.linalg.norm((1/len(xMatrix)) * summation, ord=1))
        
            
        data = np.random.permutation(data)

        yVector = data[:, -1]
        xMatrix = data[:, :-1]
        
        yHat = xMatrix.dot(nextW)
        initialW = nextW
        
    return nextW

In [145]:
def crossValidationSplit(dataset, n_folds): 
    dataset_split = list() 
    dataset_copy = list(dataset.values.tolist()) 
    fold_size = int(len(dataset) / n_folds) 
    
    for i in range(n_folds): 
        fold = list() 
        while len(fold) < fold_size: 
            index = rd.randrange(len(dataset_copy)) 
            fold.append(dataset_copy.pop(index)) 
        dataset_split.append(fold) 
    return dataset_split

def kFoldCrossValidation(dataset, kParts, algorithm, *args):
    folds = crossValidationSplit(dataset, kParts) 
    scores = list() 
    for fold in folds: 
        train_set = list(folds) 
        train_set.remove(fold) 
        train_set = sum(train_set, []) 
        test_set = list() 
        
        for row in fold: 
            row_copy = list(row) 
            test_set.append(row_copy) 
            row_copy[-1] = None 
            
        predicted = algorithm(train_set, test_set, *args) 
        actual = [row[-1] for row in fold] 
        accuracy = (predicted - actual)/kParts
        scores.append(accuracy) 
    return scores

In [146]:
def predict(xTest, coefficients): 
    yhat = xTest.dot(coefficients)
    return 1 / (1 + np.exp(-yhat))

def logisticRegression(train, test, alpha, epochs): 
    train = pd.DataFrame(train)
    test = pd.DataFrame(test)
    
    xTrain = np.array(train.iloc[:,0:4])
    yTrain = np.array(train[4])
    xTest = np.array(test.iloc[:,0:4]) 
    
    xTrain, yTrain = normalization(xTrain, yTrain)
    coef = stochasticGradientDescent(xTrain, yTrain, alpha, epochs) 
    print(coef)
    yhat = predict(xTest, coef) 
 
    for i in range(len(yhat)):
        yhat[i] = round(yhat[i]) 
    print(yhat)
    return(yhat)

In [147]:
epochs = 1000 
alpha =  0.001
kFoldCrossValidation(data_table, 10, logisticRegression, epochs, alpha)

[0.74638772 1.09633473 0.74437189 0.74185676]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.70287612 1.10270414 0.72839278 0.73171662]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.67217378 0.99699806 0.71621356 0.70405402]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.6673797  0.9879349  0.6881667  0.68368028]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.70917232 1.06294838 0.71611322 0.71003998]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.68879067 0.98490777 0.70606599 0.70143519]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.67376235 1.03458476 0.70511934 0.70727278]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.70159009 1.06317954 0.72715493 0.73291553]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.679828   1.06612278 0.70621307 0.70407167]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
[0.65424372 0.97028658 0.69668208 0.69647834]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


[array([0. , 0. , 0.1, 0. , 0.1, 0.1, 0.1, 0.1, 0. , 0. ]),
 array([0.1, 0. , 0.1, 0.1, 0.1, 0. , 0.1, 0. , 0. , 0. ]),
 array([0.1, 0.1, 0. , 0. , 0. , 0. , 0. , 0.1, 0. , 0.1]),
 array([0.1, 0.1, 0. , 0. , 0. , 0.1, 0.1, 0.1, 0. , 0.1]),
 array([0.1, 0. , 0. , 0.1, 0.1, 0. , 0.1, 0.1, 0.1, 0. ]),
 array([0. , 0. , 0. , 0.1, 0. , 0.1, 0. , 0.1, 0. , 0.1]),
 array([0. , 0.1, 0.1, 0.1, 0. , 0. , 0. , 0.1, 0. , 0. ]),
 array([0.1, 0. , 0. , 0. , 0.1, 0. , 0.1, 0. , 0.1, 0.1]),
 array([0. , 0. , 0.1, 0.1, 0. , 0.1, 0.1, 0. , 0. , 0. ]),
 array([0. , 0. , 0.1, 0.1, 0. , 0.1, 0.1, 0.1, 0.1, 0.1])]