In [1]:
#Entrega 4 Miguel Chaveinte Final

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from scipy.stats import mode
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import normalize

In [3]:
from sklearn.preprocessing import normalize

# DATOS Y CARGA

In [4]:
mnist=fetch_openml('mnist_784', version=1)
mnist.data.shape,mnist.target.shape

((70000, 784), (70000,))

In [5]:
mnist.data.to_csv('mnist_data.csv',index=False)

In [6]:
mnist.target.to_csv('mnist_target.csv',index=False)

In [26]:
X=np.array(pd.read_csv('./mnist_data.csv'))
y=np.array(pd.read_csv('./mnist_target.csv'))

In [27]:
X.shape,y.shape

((70000, 784), (70000, 1))

# NORMALIZACIÓN EXTENDIDA

In [28]:
X_norm=normalize(np.append(X,np.ones((X.shape[0],1),dtype=float).reshape(-1,1),axis=1))
X_norm.shape,type(X_norm)

((70000, 785), numpy.ndarray)

### REESCALADO DE LOS DATOS. 600 PARA APRENDIZAJE Y 100 PARA TEST

In [29]:
XR=X_norm[:700]
YR=y[:700]
XR.shape,YR.shape


((700, 785), (700, 1))

# CLASES A UTILIZAR PARA SOM

### Clase Neurona

In [30]:
class neurona(object):
    def __init__(self, f=0, c=0, dim=0):
        self.c = c
        self.f = f
        self.dim = dim
        
        # Inicializa los pesos aleatoriamente entre [-0.5, 0.5]
        self.w = normalize(0.5 - np.random.rand(dim).reshape(1,-1))
        
        # self.label_winner = []
            
    def predict(self, inputs):
        # Calcula la salida de una neurona ante una o más entradas. "inputs" puede ser un vector o una matriz 2D
        return inputs @ self.w.T
    
    def fit(self, input, alfa=1):
        # ajusta los pesos de una neurona (w) para aproximarlos a una entrada (input)
        self.w = normalize(self.w + (alfa*input))
            
    def neuron_labeling(self, inputs, target):
        # etiquetado por neuronas. Se le pasa la lista de entradas y la etiqueta (target) de cada una de esas muestras.
        # Devuelve la etiqueta de la ganadora
        Y = inputs @ self.w.T
        self.label = target[np.argmax(Y)]
        return self.label

            

### Clase SOM

In [31]:
class som():
    
    def __init__(self, filas=1, columnas=1, dim=1):
        self.lista = []
        self.filas = filas
        self.columnas = columnas
        self.dim = dim
        self.labels = []
        # Considera que un mapa rectangular es una lista de objetos "neurona", que viene localizado por sus atributos "fila" y "columna"
        for fila in range(self.filas):
            for columna in range(self.columnas):
                self.lista.append(neurona(f=fila, c=columna, dim=dim))
                
    def fit(self, inputs, max_epochs=1, init_radious=0, init_alfa=1):
        # método similar a otros algoritmo de ML. Recibe las entradas, el radio inicial, el factor de apendizaje inicial,
        # el máximo de épocas y devuelve los pesos ajustados
        self.radious = init_radious
        self.alfa = init_alfa
        t = 0
        P = inputs.shape[0]
        for epoch in range(max_epochs):
            for x in inputs:
                self.alfa = init_alfa/(1.0 + float(t/P))
                i_gana, y_gana = -1, float('-inf')
                for i in range(self.filas*self.columnas):
                    y_predict = self.lista[i].predict(x.reshape(1,-1))
                    if y_predict > y_gana:
                        y_gana = y_predict
                        i_gana = i
                f_gana = int(i_gana / self.columnas)
                c_gana = i_gana % self.columnas
                
                # Conjunto de vecinas para un radious
                for f in range(f_gana - self.radious, f_gana + self.radious+1):
                    if f < 0:
                        row = self.filas + f
                    else:
                        if f > self.filas-1:
                            row = f % self.filas
                        else:
                            row = f

                    for c in range(c_gana - self.radious, c_gana + self.radious+1):
                        if c < 0:
                            column = self.columnas + c 
                        else:
                            if c > self.columnas-1:
                                column = c % self.columnas
                            else:
                                column = c
                        self.lista[(row*self.columnas) + column].fit(x.reshape(1,-1), alfa=self.alfa)
                t += 1
                if (t%1000) == 0:
                    print(t, self.radious, "  ", end='')
            if self.radious > 0:
                self.radious -= 1
                            
    def neuron_labeling(self, inputs, target):
        # recorre la lista de neuronas y va llamanado a su metodo de etiquetado para cada neurona
        
        for i in range(self.filas*self.columnas):
            # print(X.shape, self.target.shape)
            self.labels.append(self.lista[i].neuron_labeling(inputs, target))
            # print(self.lista[i].labeling(X, target=y_deseada, etiquetado='neurona'))
 
    def predict(self, inputs):
        # recorre la lista de neuronas y calcula la salida de un conjunto de muestras
        # util para usar la salida del som como entrada a otrso sistemas
        output_list = []
        for x in inputs:
            for i in range(self.filas*self.columnas):
                output_list.append(self.lista[i].predict(x.reshape(1,-1)))
        return np.array(output_list).reshape(inputs.shape[0], -1)    
    
    def label_predict(self, inputs):
        # clasificación de muestras con el etiquetado de cada neurona hecho previamente
        label_list = []
        for x in inputs:
            label_list.append(self.labels[np.argmax(self.predict(x.reshape(1,-1)))])
        return np.array(label_list).reshape(inputs.shape[0], -1)    

# DIMENSIÓN DEL SOM. 

In [32]:
filas=15
columnas=9

In [33]:
X_train,X_test,y_train,y_test=train_test_split(XR,YR,test_size=100,stratify=YR)

In [34]:
X_train.shape,X_test.shape,y_train.shape,y_test.shape

((600, 785), (100, 785), (600, 1), (100, 1))

In [35]:
mapa_autorganizado=som(filas=15,columnas=9,dim=X_train.shape[1])

In [36]:
mapa_autorganizado.fit(inputs=X_train,max_epochs=30,init_radious=4,init_alfa=10)

1000 3   2000 1   3000 0   4000 0   5000 0   6000 0   7000 0   8000 0   9000 0   10000 0   11000 0   12000 0   13000 0   14000 0   15000 0   16000 0   17000 0   18000 0   

In [37]:
mapa_autorganizado.neuron_labeling(X_train,y_train)

In [39]:
etiquetas_neuronas_predict=mapa_autorganizado.label_predict(X_test)

In [40]:
accuracy_score(y_test,etiquetas_neuronas_predict)

0.83

# MLP (135X60X10)

In [41]:
from sklearn.preprocessing import MinMaxScaler

In [43]:
X_train_mlp=mapa_autorganizado.predict(X_train)

In [44]:
X_train_mlp.shape

(600, 135)

In [45]:
X_train_mlp=X_train_mlp.reshape(600,135)

In [46]:
X_train_mlp.shape

(600, 135)

In [48]:
X_test_mlp=mapa_autorganizado.predict(X_test)

In [49]:
X_test_mlp.shape

(100, 135)

In [50]:
X_test_mlp=X_test_mlp.reshape(100,135)

In [51]:
X_test_mlp.shape

(100, 135)

### ESCALADO

In [47]:
escalado=MinMaxScaler()

In [52]:
escalado.fit(np.vstack((X_train_mlp,X_test_mlp)))

MinMaxScaler()

In [53]:
X_train_mlp=escalado.transform(X_train_mlp)

In [54]:
X_test_mlp=escalado.transform(X_test_mlp)

In [70]:
from sklearn.neural_network import MLPClassifier

In [75]:
mlp=MLPClassifier(hidden_layer_sizes=(60,),verbose=True,max_iter=3000) #verbose=True

In [76]:
mlp.fit(X_train_mlp,y_train)

  y = column_or_1d(y, warn=True)


Iteration 1, loss = 2.42379771
Iteration 2, loss = 2.29450130
Iteration 3, loss = 2.21478745
Iteration 4, loss = 2.17160388
Iteration 5, loss = 2.14099737
Iteration 6, loss = 2.10523645
Iteration 7, loss = 2.06553810
Iteration 8, loss = 2.02537418
Iteration 9, loss = 1.98662336
Iteration 10, loss = 1.94841878
Iteration 11, loss = 1.91059026
Iteration 12, loss = 1.87206474
Iteration 13, loss = 1.83219263
Iteration 14, loss = 1.79328852
Iteration 15, loss = 1.75574516
Iteration 16, loss = 1.71862040
Iteration 17, loss = 1.68258912
Iteration 18, loss = 1.64722065
Iteration 19, loss = 1.61054660
Iteration 20, loss = 1.57680770
Iteration 21, loss = 1.54218633
Iteration 22, loss = 1.50847363
Iteration 23, loss = 1.47729794
Iteration 24, loss = 1.44403752
Iteration 25, loss = 1.41211590
Iteration 26, loss = 1.38125933
Iteration 27, loss = 1.35006750
Iteration 28, loss = 1.32173502
Iteration 29, loss = 1.29254615
Iteration 30, loss = 1.26439541
Iteration 31, loss = 1.23797193
Iteration 32, los

MLPClassifier(hidden_layer_sizes=(60,), max_iter=3000, verbose=True)

In [77]:
mlp.predict(X_test_mlp)

array([8, 7, 9, 0, 7, 5, 0, 3, 6, 4, 1, 4, 7, 4, 5, 2, 5, 9, 9, 7, 3, 5,
       6, 8, 3, 3, 3, 0, 4, 2, 6, 5, 5, 1, 4, 2, 6, 7, 5, 4, 0, 5, 9, 1,
       1, 4, 9, 8, 5, 0, 1, 6, 5, 0, 1, 7, 7, 1, 6, 4, 0, 8, 7, 7, 5, 7,
       8, 0, 4, 9, 5, 3, 1, 6, 2, 2, 9, 1, 0, 4, 8, 2, 1, 1, 2, 2, 6, 8,
       5, 3, 2, 7, 3, 3, 9, 1, 1, 0, 0, 3], dtype=int64)

In [78]:
accuracy_score(y_test,mlp.predict(X_test_mlp))

0.89