In [163]:
import numpy as np
import pandas
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestCentroid
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from collections import Counter

In [164]:
X,Y = load_digits(return_X_y=True)
classes = load_digits().target_names
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.30, random_state=0)

### KNN

In [173]:
class myKNeighborsClassifier:
    def __init__(self, k=5):
        self.k = k
        self.Xtrain = None
        self.Ytrain = None
    def fit(self,Xtrain,Ytrain):
        self.Xtrain = Xtrain
        self.Ytrain = Ytrain
    def predict(self,X):
        lenX = len(X)
        lenYtest = len(Counter(Ytest).keys())
        res = np.zeros(lenX)
        distances = np.array([]) #declaro lista de distancias
        #1. Calcular la distancia de los datos X a los datos Xtrain
        for X_idx, X_values in enumerate(X):
            distances = np.array([])
            for Xtrain_idx, Xtrain_values in enumerate(Xtrain):
                dist = abs(Xtrain_values - X_values) # calculo distancias
                distances = np.append(distances,np.round(np.mean(dist), 3))
            #2. Ordenar las distancias de menor a mayor
            Dsorted = np.argsort(distances) #ordena y guarda los índices respecto a ese orden
            #3. Calcular la clase de acuerdo a la mayoría de los k vecinos más cercanos
            timesRep = np.zeros(lenYtest)
            for D_idx, D_values in enumerate(Dsorted):
                #cubetas (sumo 1 al índice correspondiente al valor respecto a Ytrain)
                timesRep[Ytrain[D_values]] += 1         
                if(timesRep[Ytrain[D_values]] == self.k):
                    res[X_idx] = Ytrain[D_values]
                    break
        #4. Regresar valores
        return res

In [174]:
# My KNeighbors Classifier
model = myKNeighborsClassifier()
model.fit(Xtrain,Ytrain)
Ypred = model.predict(Xtest)

print("*pred:\n",Ytest,"\n", "*ypred:\n",Ypred)
print('\n\n-> Accuracy:', accuracy_score(Ytest, Ypred))
m = confusion_matrix(Ytest,Ypred)
df = pandas.DataFrame(m,index=classes,columns=classes)
df

*pred:
 [2 8 2 6 6 7 1 9 8 5 2 8 6 6 6 6 1 0 5 8 8 7 8 4 7 5 4 9 2 9 4 7 6 8 9 4 3
 1 0 1 8 6 7 7 1 0 7 6 2 1 9 6 7 9 0 0 5 1 6 3 0 2 3 4 1 9 2 6 9 1 8 3 5 1
 2 8 2 2 9 7 2 3 6 0 5 3 7 5 1 2 9 9 3 1 7 7 4 8 5 8 5 5 2 5 9 0 7 1 4 7 3
 4 8 9 7 9 8 2 6 5 2 5 8 4 8 7 0 6 1 5 9 9 9 5 9 9 5 7 5 6 2 8 6 9 6 1 5 1
 5 9 9 1 5 3 6 1 8 9 8 7 6 7 6 5 6 0 8 8 9 8 6 1 0 4 1 6 3 8 6 7 4 5 6 3 0
 3 3 3 0 7 7 5 7 8 0 7 8 9 6 4 5 0 1 4 6 4 3 3 0 9 5 9 2 1 4 2 1 6 8 9 2 4
 9 3 7 6 2 3 3 1 6 9 3 6 3 2 2 0 7 6 1 1 9 7 2 7 8 5 5 7 5 2 3 7 2 7 5 5 7
 0 9 1 6 5 9 7 4 3 8 0 3 6 4 6 3 2 6 8 8 8 4 6 7 5 2 4 5 3 2 4 6 9 4 5 4 3
 4 6 2 9 0 1 7 2 0 9 6 0 4 2 0 7 9 8 5 4 8 2 8 4 3 7 2 6 9 1 5 1 0 8 2 1 9
 5 6 8 2 7 2 1 5 1 6 4 5 0 9 4 1 1 7 0 8 9 0 5 4 3 8 8 6 5 3 4 4 4 8 8 7 0
 9 6 3 5 2 3 0 8 3 3 1 3 3 0 0 4 6 0 7 7 6 2 0 4 4 2 3 7 8 9 8 6 8 5 6 2 2
 3 1 7 7 8 0 3 3 2 1 5 5 9 1 3 7 0 0 7 0 4 5 9 3 3 4 3 1 8 9 8 3 6 2 1 6 2
 1 7 5 5 1 9 2 8 9 7 2 1 4 9 3 2 6 2 5 9 6 5 8 2 0 7 8 0 5 8 4 1 8 6 4 3 4
 2 0 4 5 8 3 9 1 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,45,0,0,0,0,0,0,0,0,0
1,0,52,0,0,0,0,0,0,0,0
2,0,1,51,0,0,0,0,1,0,0
3,0,0,1,51,0,0,0,0,2,0
4,0,0,0,0,47,0,0,1,0,0
5,0,0,0,0,0,54,1,0,0,2
6,0,0,0,0,0,0,60,0,0,0
7,0,0,0,0,0,0,0,53,0,0
8,0,3,0,2,0,0,0,1,55,0
9,0,0,0,0,0,1,0,0,0,56


In [167]:
model = KNeighborsClassifier()
model.fit(Xtrain,Ytrain)
Ypred = model.predict(Xtest)

print('-> Accuracy: ',accuracy_score(Ytest,Ypred))
m = confusion_matrix(Ytest,Ypred)
df = pandas.DataFrame(m,index=classes,columns=classes)
df

-> Accuracy:  0.9814814814814815


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,45,0,0,0,0,0,0,0,0,0
1,0,51,0,0,0,1,0,0,0,0
2,0,0,52,0,0,0,0,1,0,0
3,0,0,1,53,0,0,0,0,0,0
4,0,0,0,0,47,0,0,1,0,0
5,0,0,0,0,0,55,1,0,0,1
6,0,0,0,0,0,0,60,0,0,0
7,0,0,0,0,0,0,0,53,0,0
8,0,1,0,1,0,0,1,0,58,0
9,0,0,0,0,0,1,0,0,0,56


## Nearest Centroid

In [261]:
class myNearestCentroid:
    def __init__(self):
        self.centroids = None
        
    def fit(self,Xtrain,Ytrain):
        Xtrain = Xtrain
        Ytrain = Ytrain
        # 1. calcular el número de clases
        self.n_class = len(Counter(Ytrain).keys())
        # 2. calcular los centroides de cada clase y guardarlo en self.centroids
        meanI = np.array([])
        nElem = np.zeros(self.n_class)
        self.centroids = np.zeros(self.n_class)
        for j in range(0, len(Ytrain)):
            x = Xtrain[j]
            meanI = np.insert(meanI, j, np.mean(x))
        
        for i in range(0, len(meanI)):
            actualClass = Ytrain[i]
            self.centroids[actualClass] += meanI[i]
            nElem[actualClass] += 1
        self.centroids/=nElem
        
    def predict(self,X):
        res = np.zeros(len(X))
        # 1. calcular la distancia de cada muestra en X a cada uno de los centroides
        for X_idx, X_values in enumerate(X):
            distances = np.zeros(self.n_class)
            for c_idx, c_values in enumerate(self.centroids):
#                 diff = abs(c_values - X_values) # calculo distancias
#                 dist = np.mean(diff)
                x = np.mean(X_values)
                dist = abs(c_values - x)
                distances[c_idx] = dist
        # 2. asignar la clase de acuerdo al centroide más cercano
            distances = np.argsort(distances)
            res[X_idx] = distances[0]
        return res

In [259]:
# My Nearest Centroid
model = myNearestCentroid()
model.fit(Xtrain,Ytrain)
Ypred = model.predict(Xtest)
print("*pred:\n",Ytest,"\n", "*ypred:\n",Ypred)
print('Accuracy',accuracy_score(Ytest,Ypred))
m = confusion_matrix(Ytest,Ypred)
df = pandas.DataFrame(m,index=classes,columns=classes)
df

*pred:
 [2 8 2 6 6 7 1 9 8 5 2 8 6 6 6 6 1 0 5 8 8 7 8 4 7 5 4 9 2 9 4 7 6 8 9 4 3
 1 0 1 8 6 7 7 1 0 7 6 2 1 9 6 7 9 0 0 5 1 6 3 0 2 3 4 1 9 2 6 9 1 8 3 5 1
 2 8 2 2 9 7 2 3 6 0 5 3 7 5 1 2 9 9 3 1 7 7 4 8 5 8 5 5 2 5 9 0 7 1 4 7 3
 4 8 9 7 9 8 2 6 5 2 5 8 4 8 7 0 6 1 5 9 9 9 5 9 9 5 7 5 6 2 8 6 9 6 1 5 1
 5 9 9 1 5 3 6 1 8 9 8 7 6 7 6 5 6 0 8 8 9 8 6 1 0 4 1 6 3 8 6 7 4 5 6 3 0
 3 3 3 0 7 7 5 7 8 0 7 8 9 6 4 5 0 1 4 6 4 3 3 0 9 5 9 2 1 4 2 1 6 8 9 2 4
 9 3 7 6 2 3 3 1 6 9 3 6 3 2 2 0 7 6 1 1 9 7 2 7 8 5 5 7 5 2 3 7 2 7 5 5 7
 0 9 1 6 5 9 7 4 3 8 0 3 6 4 6 3 2 6 8 8 8 4 6 7 5 2 4 5 3 2 4 6 9 4 5 4 3
 4 6 2 9 0 1 7 2 0 9 6 0 4 2 0 7 9 8 5 4 8 2 8 4 3 7 2 6 9 1 5 1 0 8 2 1 9
 5 6 8 2 7 2 1 5 1 6 4 5 0 9 4 1 1 7 0 8 9 0 5 4 3 8 8 6 5 3 4 4 4 8 8 7 0
 9 6 3 5 2 3 0 8 3 3 1 3 3 0 0 4 6 0 7 7 6 2 0 4 4 2 3 7 8 9 8 6 8 5 6 2 2
 3 1 7 7 8 0 3 3 2 1 5 5 9 1 3 7 0 0 7 0 4 5 9 3 3 4 3 1 8 9 8 3 6 2 1 6 2
 1 7 5 5 1 9 2 8 9 7 2 1 4 9 3 2 6 2 5 9 6 5 8 2 0 7 8 0 5 8 4 1 8 6 4 3 4
 2 0 4 5 8 3 9 1 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,4,0,2,0,2,0,0,20,17,0
1,5,1,1,0,1,2,0,23,19,0
2,3,2,0,1,0,1,0,21,25,0
3,3,1,1,0,0,0,0,32,17,0
4,4,1,2,1,1,1,0,21,17,0
5,7,0,1,1,2,1,0,29,16,0
6,4,1,1,0,0,2,0,31,21,0
7,7,1,1,0,3,1,0,29,11,0
8,6,1,0,0,2,1,0,18,33,0
9,8,0,0,2,2,0,0,20,25,0


In [260]:
model = NearestCentroid()
model.fit(Xtrain,Ytrain)
Ypred = model.predict(Xtest)

print('Accuracy',accuracy_score(Ytest,Ypred))
m = confusion_matrix(Ytest,Ypred)
df = pandas.DataFrame(m,index=classes,columns=classes)
df

Accuracy 0.8925925925925926


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,45,0,0,0,0,0,0,0,0,0
1,0,42,1,0,0,1,1,0,1,6
2,1,2,44,3,0,0,0,2,0,1
3,0,0,1,44,0,0,0,2,2,5
4,0,1,0,0,44,0,0,3,0,0
5,0,0,0,0,0,47,1,0,0,9
6,0,1,0,0,0,0,59,0,0,0
7,0,0,0,0,0,0,0,52,1,0
8,0,3,0,1,0,1,0,1,51,4
9,0,0,0,0,0,1,0,2,0,54
