In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random as rd

In [2]:
data_table = pd.read_csv('iris.csv', names =['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'flower'])

In [3]:
data_table.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [91]:
xMatrix = np.array(data_table[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']])
yVector = np.array(data_table['flower'])

In [92]:
def normalization(matrix, vector):
    #Mean calculation
    mVector = np.mean(vector, axis=0)
    mMatrix = np.mean(matrix, axis=0)
    
    #Standard deviation
    stdVector = np.std(vector, axis = 0)
    stdMatrix = np.std(matrix, axis = 0)
     
    #Normalization of data
    vector = (vector - mVector)/stdVector
    matrix = (matrix - mMatrix)/stdMatrix
    
    return matrix, vector

In [413]:
def gradientDescent(xMatrix, yVector, epochs, alpha):
    xMatrix = np.insert(xMatrix, 0, 1, axis =1)
    wCoefMatrix = np.ones((xMatrix.shape[1],), dtype=float)
    yPredicted = xMatrix.dot(wCoefMatrix)
    mse = []
    for epoch in range(epochs):
        somatorio = 0
        for i in range(xMatrix.shape[0]):
            somatorio += (yVector[i] - sigmoide(yPredicted[i])) * xMatrix[i]

        mse.append((-1/xMatrix.shape[0]) * somatorio)
        wCoefMatrix = wCoefMatrix + (alpha/wCoefMatrix.shape[0])*somatorio
        yPredicted = xMatrix.dot(wCoefMatrix)
    
    return wCoefMatrix

def stochasticGradientDescent(xMatrix, yVector, epochs, alpha):
    xMatrix = np.insert(xMatrix, 0, 1, axis =1)
    wCoefMatrix = np.ones((xMatrix.shape[1],), dtype=float)
    yPredicted = xMatrix.dot(wCoefMatrix)
    data = np.append(xMatrix, np.split(yVector, xMatrix.shape[0], axis=0), axis=1)
    mse = []

    for epoch in range(epochs):
        erro = 0
        somatorio = 0
        for i in range(xMatrix.shape[0]):
            wCoefMatrix  = wCoefMatrix + alpha * (yVector[i] - sigmoide(yPredicted[i])) * xMatrix[i]
            somatorio += (yVector[i] - sigmoide(yPredicted[i])) * xMatrix[i]
        
        mse.append((-1/xMatrix.shape[0]) * somatorio)  
        
        data = np.random.permutation(data)
        xMatrix = data[: ,0:xMatrix.shape[1]]
        yVector = data[:,xMatrix.shape[1]]
        
        yPredicted = xMatrix.dot(wCoefMatrix)  
        
    return wCoefMatrix

In [436]:
def crossValidationSplit(xMatrix, yVector, n_folds): 
    xMatrixSplit = []
    yVectorSplit = []
    xMatrixCopy = xMatrix

    yVectorCopy = yVector
    foldSize = int(len(xMatrix) / n_folds) 
    
    for i in range(n_folds): 
        foldXM = [] 
        foldYV = []
        while len(foldXM) < foldSize: 
            index = rd.randrange(len(xMatrixCopy)) 
            foldXM.append(xMatrixCopy[index]) 
            xMatrixCopy = np.delete(xMatrixCopy, index, axis=0)
            foldYV.append(yVectorCopy[index]) 
            yVectorCopy = np.delete(yVectorCopy, index)
        xMatrixSplit.append(foldXM)
        yVectorSplit.append(foldYV)
    return xMatrixSplit, yVectorSplit

def kFoldCrossValidation(xMatrix, yVector, kParts, algorithm, *args):
    xMFolds, yVFolds = crossValidationSplit(xMatrix, yVector, kParts) 
    scores = list() 
    count = 0;
    for fold in xMFolds: 
        xTrain = xMFolds
        yTrain = yVFolds
        
        xTest = xTrain[count]
        yTest = np.array(yTrain[count])
        
        np.delete(xTrain,count)
        np.delete(yTrain,count)
        
        yTrain = np.stack(yTrain)
        yTrain = yTrain.ravel()
        xTrainAux = []
        
        for i in range(kParts):
            for j in range(len(xTrain[i])):
                xTrainAux.append(xTrain[i][j])
        xTrain = np.stack(xTrainAux)
        
        yTest = np.where(yTest < 0, 0, yTest)
        predicted = algorithm(xTrain, yTrain, xTest,  *args)
        for i in range(len(predicted)):
            print("Predicted: ",predicted[i],"Actual: ",yTest[i])
        accuracy = np.array([x - y for x, y in zip(predicted, yTest)])/kParts
        scores.append(accuracy) 
    return scores

In [437]:
def sigmoide(z):
    return 1 / (1 + np.exp(-z))

In [438]:
def predict(xMatrix, w):
    predictions = []
    xMatrix = np.insert(xMatrix, 0, 1, axis =1)
    for i in range(xMatrix.shape[0]):
        predictions.append(round(sigmoide(xMatrix[i].dot(w))))
    return predictions

def logisticRegression(xTrain, yTrain, xTest, alpha, epochs): 
    xTrain, yTrain = normalization(xTrain, yTrain)
    coef = stochasticGradientDescent(xTrain, yTrain, epochs, alpha)
    return(predict(xTest, coef))

In [439]:
epochs = 1000 
alpha =  0.001
kParts = 5;

In [440]:
kFoldCrossValidation(xMatrix, yVector, kParts, logisticRegression, alpha, epochs)

Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  1.0 Actual:  1.0
Predicted:  0.0 Actual:  0.0
Predicted:  0.

[array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.])]