In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as rd

In [2]:
data_table = pd.read_csv('iris.csv', names =['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'flower'])

In [3]:
data_table.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [91]:
xMatrix = np.array(data_table[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']])
yVector = np.array(data_table['flower'])

In [92]:
def normalization(matrix, vector):
    #Mean calculation
    mVector = np.mean(vector, axis=0)
    mMatrix = np.mean(matrix, axis=0)
    
    #Standard deviation
    stdVector = np.std(vector, axis = 0)
    stdMatrix = np.std(matrix, axis = 0)
     
    #Normalization of data
    vector = (vector - mVector)/stdVector
    matrix = (matrix - mMatrix)/stdMatrix
    
    return matrix, vector

In [128]:
def gradientDescent(xMatrix, yVector, epochs, alpha):
    xMatrix = np.insert(xMatrix, 0, 1, axis =1)
    wCoefMatrix = np.ones((xMatrix.shape[1],), dtype=float)
    yPredicted = xMatrix.dot(wCoefMatrix)
    mse = []
    for epoch in range(epochs):
        somatorio = 0
        for i in range(xMatrix.shape[0]):
            somatorio += (yVector[i] - sigmoide(yPredicted[i])) * xMatrix[i]

        mse.append((-1/xMatrix.shape[0]) * somatorio)
        wCoefMatrix = wCoefMatrix + (alpha/wCoefMatrix.shape[0])*somatorio
        yPredicted = xMatrix.dot(wCoefMatrix)
    
    return wCoefMatrix

def stochasticGradientDescent(xMatrix, yVector, epochs, alpha):
    xMatrix = np.insert(xMatrix, 0, 1, axis =1)
    wCoefMatrix = np.ones((xMatrix.shape[1],), dtype=float)
    yPredicted = xMatrix.dot(wCoefMatrix)
    data = np.append(xMatrix, np.split(yVector, xMatrix.shape[0], axis=0), axis=1)
    mse = []

    for epoch in range(epochs):
        erro = 0
        somatorio = 0
        for i in range(xMatrix.shape[0]):
            wCoefMatrix  = wCoefMatrix + alpha * (yVector[i] - sigmoide(yPredicted[i])) * xMatrix[i]
            somatorio += (yVector[i] - sigmoide(yPredicted[i])) * xMatrix[i]
        
        mse.append((-1/xMatrix.shape[0]) * somatorio)  
        
        data = np.random.permutation(data)
        xMatrix = data[: ,0:xMatrix.shape[1]]
        yVector = data[:,xMatrix.shape[1]]
        
        yPredicted = xMatrix.dot(wCoefMatrix)  
        
    return wCoefMatrix

In [175]:
def crossValidationSplit(xMatrix, yVector, n_folds): 
    xMatrixSplit = []
    yVectorSplit = []
    xMatrixCopy = xMatrix

    yVectorCopy = yVector
    foldSize = int(len(xMatrix) / n_folds) 
    
    for i in range(n_folds): 
        foldXM = [] 
        foldYV = []
        while len(foldXM) < foldSize: 
            index = rd.randrange(len(xMatrixCopy)) 
            foldXM.append(xMatrixCopy[index]) 
            xMatrixCopy = np.delete(xMatrixCopy, index, axis=0)
            foldYV.append(yVectorCopy[index]) 
            yVectorCopy = np.delete(yVectorCopy, index)
        xMatrixSplit.append(foldXM)
        yVectorSplit.append(foldYV)
    return xMatrixSplit, yVectorSplit

def kFoldCrossValidation(xMatrix, yVector, kParts, algorithm, *args):
    xMFolds, yVFolds = crossValidationSplit(xMatrix, yVector, kParts) 
    scores = list() 
    for fold in folds: 
        train_set = list(folds) 
        train_set.remove(fold) 
        train_set = sum(train_set, []) 
        test_set = list() 
        
        for row in fold: 
            row_copy = list(row) 
            test_set.append(row_copy) 
            row_copy[-1] = None 
            
        predicted = algorithm(train_set, test_set, *args) 
        actual = [row[-1] for row in fold] 
        accuracy = (predicted - actual)/kParts
        scores.append(accuracy) 
    return scores

In [176]:
def sigmoide(z):
    return 1 / (1 + np.exp(-z))

In [177]:
def predict(xMatrix, w):
    predictions = []
    xMatrix = np.insert(xMatrix, 0, 1, axis =1)
    for i in range(xMatrix.shape[0]):
        predictions.append(round(sigmoide(xMatrix[i].dot(w))))
    return predictions

def logisticRegression(xMatrix, yVector, alpha, epochs): 
    xMatrix, yVector = normalization(xMatrix, yVector)
    coef = stochasticGradientDescent(xMatrix, yVector, epochs, alpha)
    print(coef)
    return(predict(xMatrix, coef))

In [178]:
epochs = 1000 
alpha =  0.001
logisticRegression(xMatrix, yVector, alpha, epochs)

[-48.63479885  37.11961229 -34.03024809  49.7469121   49.31732027]


[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0,
 1.0]

In [179]:
x,y =crossValidationSplit(xMatrix, yVector, 5)

In [180]:
x

[[array([-0.11120129,  1.70160383, -1.08374115, -0.68329141]),
  array([-0.5810659 ,  0.85713543, -1.01435952, -0.8607697 ]),
  array([2.23812177, 0.01266703, 1.41399774, 1.26896977]),
  array([ 0.82852793, -0.83180138,  1.55276101,  1.44644806]),
  array([-0.5810659 ,  0.43490123, -0.80621461, -0.50581312]),
  array([-0.11120129, -0.19845007,  1.1364712 ,  1.26896977]),
  array([-1.36417359,  1.06825253, -1.29188606, -1.03824799]),
  array([ 0.35866332, -0.62068428,  1.1364712 ,  0.91401319]),
  array([1.92487869, 0.01266703, 1.27523447, 1.26896977]),
  array([-1.8340382 , -0.19845007, -1.22250442, -1.21572628]),
  array([ 0.35866332,  1.49048673, -0.80621461, -0.8607697 ]),
  array([-1.36417359,  0.22378413, -1.01435952, -1.03824799]),
  array([-1.52079513, -1.67626978, -1.08374115, -0.8607697 ]),
  array([-0.89430898,  0.01266703, -0.94497788, -1.21572628]),
  array([2.39474331, 0.22378413, 1.27523447, 1.09149148]),
  array([-1.67741667, -0.19845007, -1.08374115, -1.03824799]),
  ar

In [181]:
y

[[-1.0,
  -1.0,
  1.0,
  1.0,
  -1.0,
  1.0,
  -1.0,
  1.0,
  1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  1.0,
  -1.0,
  1.0,
  1.0,
  1.0,
  1.0],
 [1.0,
  1.0,
  1.0,
  1.0,
  -1.0,
  1.0,
  -1.0,
  1.0,
  -1.0,
  -1.0,
  1.0,
  -1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  -1.0,
  -1.0],
 [1.0,
  1.0,
  -1.0,
  -1.0,
  1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  1.0,
  -1.0,
  1.0,
  1.0,
  -1.0,
  -1.0,
  1.0,
  1.0,
  1.0,
  1.0],
 [-1.0,
  1.0,
  1.0,
  -1.0,
  -1.0,
  -1.0,
  1.0,
  1.0,
  1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  -1.0],
 [-1.0,
  1.0,
  -1.0,
  1.0,
  1.0,
  1.0,
  1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  -1.0,
  1.0,
  -1.0,
  -1.0,
  1.0,
  -1.0,
  -1.0,
  -1.0]]