In [515]:
from sklearn import datasets
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.io import loadmat
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import  StratifiedShuffleSplit,GridSearchCV
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator

In [540]:
mnist_path = 'mnist-original.mat'

mnist = loadmat(mnist_path)
X = mnist['data'].T
y = mnist['label'][0]
#display(X.shape , y.shape)

# Scale all X values
scaler = StandardScaler()
X_scaled  = scaler.fit_transform(X)

sss = StratifiedShuffleSplit(n_splits=10, test_size=0.0005,train_size=0.0005, random_state=0)
train_index, test_index = next(sss.split(X=X_scaled, y=y))   

X_train, X_test = X_scaled[train_index], X_scaled[test_index]
y_train, y_test = y[train_index], y[test_index]


#display(y_train[:5])
df_y_train = pd.get_dummies(y_train)
y_train = df_y_train.values
#display(y_train[:5])

df_y_test = pd.get_dummies(y_test)
y_test = df_y_test.values


display('X_train shape ', X_train.shape,'X_test shape' , X_test.shape)
display('Y_train shape ', y_train.shape,'Y_test shape' , y_test.shape)



'X_train shape '

(35, 784)

'X_test shape'

(35, 784)

'Y_train shape '

(35, 10)

'Y_test shape'

(35, 10)

In [565]:
class MLPerceptron(BaseEstimator):
    
    #layers - includes input, #hidden, output
    def __init__(self,layers_len,eta=0,random_state=50):
        self.random_state = random_state
        self.eta = eta
        self.n_layers = len(layers_len)
        self.layers_len = layers_len
       
        self.init_weights()
     
    
    def init_weights(self):
        rng = np.random.RandomState(self.random_state)       
                
        layers_len_next = self.layers_len.copy()
        self.weights = []
        layers_len_next.pop(0)
        
        print(self.layers_len)
        
        for layer_len, layer_len_next in zip(self.layers_len, layers_len_next):
            #print(layer_len,layer_len_next)
                        
            w = rng.normal(loc=0.0,scale=0.1,size=[layer_len_next,layer_len + 1])
            #print('w',w)
            
            self.weights.append(w)
        
        '''
        for debug the sample page 376
        '''
#         self.weights.append(np.array([[0.5, 0, 0.3],
#                                    [-0.2, 0.4, 0.7],
#                                    [0,-0.5, -0.1]]))
#         self.weights.append(np.array([[-0.5, 0.4, 0,0],
#                                    [0.3, 0.6, 0.4,0]]))
#         self.weights.append(np.array([[0.5, 0.7, 0]]))
                            
        
        #print('weights', self.weights)
        return self.weights
    
    def train(self,X,y,n_iter=50):
    
        n_samples = y.shape[0]
        #print('n_iter',n_iter)
        
        for it in range(n_iter):
            #print('train it',it)
            self.back_propagation(X, y)
           
            
    def back_propagation(self, X,y):
        
        n_samples = X.shape[0]
        #print('x shape',n_samples)
    
        layers_input, layers_net = self.foward_propagation(X)
        #print('[back_propagation] layers_net \n',layers_net,end='\n\n')
        #print('[back_propagation] layers_input \n',layers_input,end='\n\n')
            
        errors = [None] * self.n_layers
        sigmas = [None] * self.n_layers
        
        
        errors[-1] = (layers_input[-1][:-1] - y) * self.error_part(layers_input[-1][:-1])
                
        for layer_index in np.arange(self.n_layers - 2  ,0 , -1):
            #print('[back_propagation] LAYER ',layer_index)
            layer_weights = self.weights[layer_index]
            layer_weights = np.delete(layer_weights, -1, axis=1) #remove weight for bias
            #print('[back_propagation] layer_weights after remove bias \n',layer_weights,end='\n\n')  
            
            sigmas[layer_index] = self.sigma(errors[layer_index+1],layer_weights)
            #print('[back_propagation] sigmas \n', sigmas[layer_index],end='\n\n') 
                       
            layer_input = layers_input[layer_index]
            layer_input = np.delete(layer_input, -1, axis=0)
            #print('[back_propagation] layer_input after remove bias',layer_input,end='\n\n')
            
            errors[layer_index] =  sigmas[layer_index] * self.error_part(layer_input)
            #print('[back_propagation] errors \n',errors,end='\n\n') 
            
            
        #update weights
        #print('[back_propagation] UPDATE WEIGHTS' ,end='\n\n') 
        tmp_weights = [None] * self.n_layers
        for layer_index in np.arange(self.n_layers - 1):
            layer_weights = self.weights[layer_index]
            layer_input = layers_input[layer_index]
                                         
            #print('[back_propagation] layer_weights',layer_weights ,end='\n\n') 
            #print('[back_propagation] layer_input',layer_input ,end='\n\n') 
        
            #print('[back_propagation] errors[layer_index+1]',errors[layer_index+1],errors[layer_index+1].transpose().shape ,end='\n\n') 
            
            tmp_weights =  errors[layer_index+1].transpose() @ layer_input.reshape(-1,1).transpose()
            #print('[back_propagation] tmp_weights',tmp_weights ,end='\n\n') 
            
            tmp_weights =  tmp_weights * self.eta
            #print('[back_propagation] tmp_weights * eta',tmp_weights ,end='\n\n') 
            
            self.weights[layer_index]  = self.weights[layer_index] - tmp_weights
            #print('[back_propagation]  comp weights',self.weights[layer_index] ,end='\n\n') 
        
        
        #print('[back_propagation]  self.weights',self.weights ,end='\n\n') 
       
      
        
    def foward_propagation(self, X):
        
        #print('[foward_propagation] n_layers',self.n_layers)
        
        layers_input = [None] * self.n_layers
        layers_net = [None] * self.n_layers
        
        
        layer_input = X
        #print('[foward_propagation] layer_input \n',layer_input)
        
        layers_net[0] = np.array(layer_input.ravel())
        for layer_index in range(self.n_layers - 1):
            n_examples = layer_input.shape[0]
            print('n_examples',n_examples,'layer_input',layer_input.shape)
            layer_input = np.c_[ layer_input, np.ones(784) ]  
            print('layer_input after add bias \n',type(layer_input),layer_input.shape)
            
            #layer_input = layer_input.reshape(-1,1)
            #layer_weights = self.weights[layer_index].reshape(-1,1)
            #print('layer_weights shape',layer_weights.transpose().shape,'layer_input shape' , layer_input.shape)
            layers_input[layer_index] = layer_input
            layer_weights = self.weights[layer_index]
            #print('[foward_propagation] layer_index',layer_index, ' layer_input \n', layer_input,'shape', layer_input.shape)
            #print('[foward_propagation] layer_index',layer_index, ' layer_weights \n', layer_weights)
            print(layer_input.shape)                       
            layers_net[layer_index + 1] = self.net(layer_input,layer_weights)
            #print('[foward_propagation] next layer_index',layer_index + 1, ' layers_net ', layers_net[layer_index + 1])
                   
            layer_output = self.sigmoid(layers_net[layer_index + 1])
            
            #print('[foward_propagation] next layer_index',layer_index + 1, ' layer_output ',layer_output)
            
            layer_input = layer_output

        layers_input[self.n_layers - 1] = np.append(layer_output, 1) 
        #print('[foward_propagation] layers_input \n',layers_input)
        return layers_input, layers_net
        

    
    def predict(self,X):
        layers_input, layers_net = self.foward_propagation(X)
        Y_hat = layers_input[-1][:-1]
        return Y_hat
    
    def net(self,X,W):
        print('net', X.shape,W.shape)
        return np.dot(X,W.transpose())
    
    def sigmoid(self,value):
        sig = 1 / (1 + np.exp(-value))
        return sig     
 
    def error_part(self, value):
        error_part = value * (1 - value) 
        return error_part
    
    def sigma(self, E,W):
        return np.dot(E,W)
   
 
      

In [566]:
# X = pd.DataFrame([1,0])
# print(X.values.shape)
# y = pd.DataFrame([1])
# print(y.values.shape)


In [567]:
mlp = MLPerceptron(layers_len = [784, 784, 10],eta=0.5,random_state=50) #[784, 784, 10] #

# mlp.train(X.values, y.values,n_iter=100)   
# mlp.predict(X.values)
mlp.train(X_train, y_train,n_iter=100)   



[784, 784, 10]
n_examples 35 layer_input (35, 784)


ValueError: all the input array dimensions except for the concatenation axis must match exactly