In [147]:
import numpy as np
import copy
import math
import random 
def turnIntoMatrix(filepath):
    file = open(filepath, "r")
    lines = file.readlines()
    file.close()
    
    ##Preparing the list of lists 
    matrix = []
    
    for line in lines:
        ##sample will store all the values found 
        ##in a line which represents a sample when the string values were turned into numbers 
        sample = []
        line = line.replace("\n", "")
        split = line.split(",")
        for value in split:
            sample.append(float(value))
        matrix.append(sample)
    return matrix 


In [40]:
##Test turnIntoMatrix
##parsing the .treated file to create our sample matrix 
matrix = turnIntoMatrix("dermatology.data.treated")
matriz = np.array(matrix)
print(matriz)

[[ 2.  2.  0. ...  0. 55.  2.]
 [ 3.  3.  3. ...  0.  8.  1.]
 [ 2.  1.  2. ...  3. 26.  3.]
 ...
 [ 3.  2.  2. ...  3. 28.  3.]
 [ 2.  1.  3. ...  3. 50.  3.]
 [ 3.  2.  2. ...  0. 35.  1.]]


In [92]:
def meanByClass(matriz, classe):
    means = np.zeros(33)
    count = 0
    for line in matriz:
        if line[34] == classe:
            means += line[:33]
            count += 1
    return (means/count, count)

print(meanByClass(matriz, 2))
print(meanByClass(matriz, 3))
print(meanByClass(matriz, 4))
print(meanByClass(matriz, 5))
print(meanByClass(matriz, 6))

(array([2.28333333, 2.06666667, 0.95      , 1.61666667, 0.03333333,
       0.        , 0.01666667, 0.        , 0.06666667, 0.11666667,
       0.05      , 0.        , 0.46666667, 1.06666667, 0.        ,
       2.21666667, 1.8       , 0.2       , 1.        , 0.        ,
       0.16666667, 0.01666667, 0.16666667, 0.        , 0.        ,
       0.        , 0.        , 2.2       , 0.        , 0.01666667,
       0.01666667, 1.63333333, 0.03333333]), 60)
(array([2.08450704, 1.63380282, 2.09859155, 2.26760563, 1.33802817,
       2.26760563, 0.        , 1.91549296, 0.02816901, 0.02816901,
       0.        , 2.07042254, 0.16901408, 0.        , 0.05633803,
       2.26760563, 2.12676056, 0.28169014, 1.1971831 , 0.        ,
       0.        , 0.        , 0.        , 0.04225352, 2.        ,
       0.23943662, 2.30985915, 1.11267606, 2.29577465, 0.01408451,
       0.        , 2.28169014, 2.71830986]), 71)
(array([1.89583333, 1.52083333, 1.20833333, 0.47916667, 1.16666667,
       0.        , 0.       

In [129]:
def covarianceMatrixByClass(matriz, classe):
    means, factor_Ni = meanByClass(matriz, classe)
    covarianceMatriz = np.zeros(1089).reshape(33,33)
    
    for line in matriz:
        aux = np.zeros(33)
        aux = line[:33] - means
        a = aux.reshape(1,33)
        b = aux.reshape(1,33).T
        covarianceMatriz += np.matmul(b, a)
        
    return ((1/factor_Ni*covarianceMatriz), factor_Ni)

def SW(matriz):
    i = 1
    n = 0
    Sw = np.zeros(1089).reshape(33,33)
    while i <= 6:
        covariance, factor_NI = covarianceMatrixByClass(matriz, i)
        i += 1 
        n += factor_NI
        Sw += ((factor_NI)/n)*covariance
    return Sw
   

In [139]:
def lda(x, means, sw, observations, allobs):
    SWinverse = np.linalg.inv(sw)
    a = np.matmul(np.matmul(x.reshape(1,33),SWinverse),means.reshape(1,33).T)
    b = np.matmul(np.matmul(means.reshape(1,33), SWinverse), means.reshape(1,33).T)
    l = math.log(observations/allobs)
    return a - b/2 + l

In [143]:
means, observations = meanByClass(matriz, 1)
sw = SW(matriz)
sample = matriz[20][:33]
lda(sample, means, sw, observations, len(matriz))


array([[0.03107489]])

In [145]:
def getRandomDataSet(matrix, percentage):
    numberOfSamples = int(round(percentage*len(matrix)))
    return random.sample(matrix, numberOfSamples)
                       

In [198]:
def predictions(sample):
    means1, observations1 = meanByClass(trainingSet, 1)
    means2, observations2 = meanByClass(trainingSet, 2)
    means3, observations3 = meanByClass(trainingSet, 3)
    means4, observations4 = meanByClass(trainingSet, 4)
    means5, observations5 = meanByClass(trainingSet, 5)
    means6, observations6 = meanByClass(trainingSet, 6)
    sw = SW(trainingSet)
    allobs = len(trainingSet)
    lda1 = float(lda(sample, means1, sw, observations1, allobs))
    lda2 = float(lda(sample, means2, sw, observations2, allobs)) 
    lda3 = float(lda(sample, means3, sw, observations3, allobs))
    lda4 = float(lda(sample, means4, sw, observations4, allobs))
    lda5 = float(lda(sample, means5, sw, observations5, allobs))
    lda6 = float(lda(sample, means6, sw, observations6, allobs)) 
    maximo = max(lda1,lda2,lda3,lda4, lda5, lda6)
    if maximo == lda1:
        return 1 
    if maximo == lda2:
        return 2
    if maximo == lda3:
        return 3
    if maximo == lda4:
        return 4
    if maximo == lda5:
        return 5
    if maximo == lda6:
        return 6
    return 0 

def teste(trainingSet, testSet):
    matrizDeConfusao = [[0,0,0,0,0,0],
                       [0,0,0,0,0,0],
                       [0,0,0,0,0,0],
                       [0,0,0,0,0,0],
                       [0,0,0,0,0,0],
                       [0,0,0,0,0,0]]
    
    for line in testSet:
        classe = predictions(line[:33])
        if classe == line[34]:
            prevision = classe - 1
            matrizDeConfusao[prevision][prevision] += 1
        else:
            prevision = classe - 1
            trueClass = int(line[34]) - 1
            matrizDeConfusao[prevision][trueClass] += 1
    return matrizDeConfusao


In [195]:
for i in range(100):
    trainingSet = getRandomDataSet(matrix, 0.8)
    testSet = getRandomDataSet(matrix, 0.2)  
    matrizDeConfusao = teste(np.array(trainingSet), np.array(testSet))
    print(matrizDeConfusao)

[[22, 18, 6, 9, 13, 4], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[24, 10, 11, 10, 12, 5], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[26, 6, 19, 13, 6, 2], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[22, 11, 12, 14, 10, 3], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[25, 8, 15, 13, 6, 5], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[22, 14, 10, 11, 11, 4], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[23, 9, 12, 11, 14, 3], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[31, 6, 15, 8, 7, 5], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[

[[23, 7, 15, 12, 10, 5], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[27, 7, 15, 13, 9, 1], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[20, 15, 8, 11, 14, 4], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[30, 12, 12, 7, 7, 4], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[23, 12, 15, 8, 9, 5], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[31, 12, 13, 9, 4, 3], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[22, 16, 11, 5, 12, 6], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[21, 15, 21, 7, 3, 5], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
[[21,

In [201]:
trainingSet[17][34]
predictions(np.array(trainingSet[17][:33]))

1