<div style="font-size: 14pt;">Prof. Krzysztof Rybinski</div><br/><br/>
<div style="font-size: 22pt;"><b>Artificial Intelligence course</b></div><br/><br/>
<div style="font-size: 18pt;">LAB 5.3</div><br/>
<div style="font-size: 18pt;">- Predicting handwritten digits in MNIST dataset with MLP</div><br/><br/>
<div style="font-size: 18pt;">- Homework 3 described at the end of this Jupyter Notebook</div><br/>

In [2]:
import keras

In [None]:
# check MNIST data information
# https://en.wikipedia.org/wiki/MNIST_database

In [3]:
from keras.datasets import mnist

In [4]:
dataset = mnist.load_data()

(Xtrain, ytrain), (Xtest, ytest) = dataset

n_train = len(Xtrain)
n_test = len(Xtest)

n_features = 28*28

Xtrain = Xtrain.reshape( n_train, n_features )
Xtest  = Xtest.reshape( n_test, n_features )

In [None]:
Xtrain[0].shape

In [None]:
28*28

In [None]:
from matplotlib import pyplot as plt
import numpy as np

In [None]:
plt.imshow( np.reshape( Xtrain[10002], (28,28) ) , cmap=plt.cm.gray)
plt.show()

In [None]:
Xtrain.shape

In [None]:
Xtest.shape

In [9]:
from sklearn.neural_network import MLPClassifier

In [None]:
clf = MLPClassifier(hidden_layer_sizes=(100,), verbose=True)

In [None]:
# check info on MLPClassifer in sklearn
# https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html

In [None]:
clf.fit(Xtrain, ytrain)

In [None]:
help(clf)

In [None]:
print(clf.coefs_)

In [None]:
len(clf.coefs_)

In [None]:
clf.coefs_[0].shape
#explain the dimension of the W matrix for the first (hidden) layer

In [None]:
clf.coefs_[1].shape
#explain the dimension of the W matrix for the second (output) layer

In [None]:
clf.intercepts_[0].shape
#explain the dimension of the b (bias) vector for the first (hidden) layer

In [None]:
clf.intercepts_[1].shape
#explain the dimension of the b (bias) vector for the second (output) layer

In [None]:
clf.score(Xtrain, ytrain)

In [None]:
clf.score(Xtest, ytest)

In [None]:
plt.imshow( np.reshape( Xtest[120], (28,28) ) , cmap=plt.cm.gray)
plt.show()

In [None]:
clf.predict(Xtest[120].reshape(1, -1))

In [None]:
class MyMLP:
    def __init__(self, n_input, n_hidden, n_output, random_seed = 0, activate='sigmoid', verbose=False):
        np.random.seed( random_seed )
        # biases for hidden layer
        self.b_hidden = np.random.randn( 1,n_hidden )
        # weights for hidden layer
        self.w_hidden = np.random.randn( n_input, n_hidden ) / np.sqrt(n_input+n_hidden)    # Glorot Initialization
        # biases for output layer
        self.b_output = np.random.randn( 1, n_output )
        # weights for hidden layer
        self.w_output = np.random.randn( n_hidden, n_output ) / np.sqrt(n_hidden+n_output)
        
        self.verbose = verbose
        if activate=='sigmoid':
            self.activate = self.sigmoid
            self.activate_der = self.sigmoid_der
        else:
            self.activate = self.tanh
            self.activate_der = self.tanh_der

        
    def sigmoid(self,x):
        return 1.0/(1.0+np.exp(-x))

    def sigmoid_der(self,x):
        g = 1.0/(1.0+np.exp(-x))
        return g, g*(1.0-g)
    
    def tanh(self,x):
        return np.tanh(x)
    
    def tanh_der(self,x):
        g = np.tanh(x)
        return g, 1.0-g**2

    def softmax(self,x):
        x -= np.max(x,axis=1,keepdims=True)
        x  = np.exp(x)
        x /= np.sum(x,axis=1,keepdims=True)
        return x
        
    def predict(self, X):
        # X.shape = (m,n_input)
        Z1 = X.dot(self.w_hidden) + self.b_hidden
        # Z1.shape = (m,n_hidden)
        A1 = self.activate( Z1 )
        # A1.shape = (m,n_hidden)
        Z2 = A1.dot(self.w_output) + self.b_output
        # Z2.shape = (m,n_output)
        A2 = self.softmax( Z2 )
        # A2.shape = (m,n_output)
        return A2
    
    def predict_class(self, X):
        yhat = self.predict(X)
        # pred.shape = (m,n_output)
        # np.argmax( pred , axis=1 ).shape = (m,)
        return np.argmax( yhat , axis=1 )
    
    def score(self,X,y):
        return np.mean( self.predict_class(X) == y )
    
    # cross-entropy
    def loss(self, X, y):
        yhat = self.predict(X)
        return - np.mean( np.log( yhat[ range(len(yhat)), y ] ) )
    
    def fit(self, Xtrain, ytrain, epochs = 100, learning_rate = 0.1):
        m,_ = Xtrain.shape
        
        for iter in range(epochs):  
            # Forward propagation
            # X.shape = (m,n_input)
            Z1 = Xtrain.dot(self.w_hidden) + self.b_hidden
            # Z1.shape = (m,n_hidden)
            A1,dZ1 = self.activate_der( Z1 )
            # A1.shape = (m,n_hidden)
            Z2 = A1.dot(self.w_output) + self.b_output
            # Z2.shape = (m,n_output)
            A2 = self.softmax( Z2 )
            # A2.shape = (m,n_output)

            # Backward propagation
#             delta2 = (A2-ytrain_one_hot)/m
            delta2 = A2
            delta2[ range(len(delta2)), ytrain ] -= 1
            delta2 /= len(delta2)
        
            delta1 = delta2.dot( self.w_output.T ) * dZ1

            dw_output = A1.T.dot(delta2)
            dw_hidden = Xtrain.T.dot(delta1)

            db_output = np.sum( delta2, axis=0, keepdims=True )
            db_hidden = np.sum( delta1, axis=0, keepdims=True )
            
            
            # Gradient descent
            self.w_hidden -= (learning_rate * dw_hidden)
            self.b_hidden -= (learning_rate * db_hidden)
            self.w_output -= (learning_rate * dw_output)
            self.b_output -= (learning_rate * db_output)
            
            if self.verbose:
                print("Loss after iteration %i: %f (score=%.2f%%)" %(iter, self.loss(Xtrain, ytrain), 100.0*self.score(Xtrain,ytrain)))
        return 0

In [None]:
mlp = MyMLP(784, 100, 10, random_seed=0, activate='tanh', verbose=True)

In [None]:
mlp.fit( Xtrain, ytrain)

In [None]:
mlp.score( Xtest, ytest )

# Homework 3
Load MNIST fashion dataset, it has the same dimensions as MNIST dataset <br/>
https://keras.io/api/datasets/fashion_mnist/  <br/> 
Go to the sklearn MLPClassifier website, learn about parameters and try different ones to achieve the 
best accuracy on the test set <br/>
Comment on bias (overfitting) and variance (underfitting) <br/>
https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html

In [5]:
from keras.datasets import fashion_mnist

In [6]:
dataset = fashion_mnist.load_data()

(Xtrain, ytrain), (Xtest, ytest) = dataset

n_train = len(Xtrain)
n_test = len(Xtest)

n_features = 28*28

Xtrain = Xtrain.reshape( n_train, n_features )
Xtest  = Xtest.reshape( n_test, n_features )

In [7]:
Xtrain[0].shape

(784,)

In [None]:
plt.imshow( np.reshape( Xtrain[5000], (28,28) ) , cmap=plt.cm.gray)
plt.show()

In [None]:
ytrain[20]

In [13]:
clf = MLPClassifier(hidden_layer_sizes=(100,50,25,10), max_iter=50,activation = 'tanh',
                    solver='adam',random_state=1, verbose=True)

In [14]:
clf.fit(Xtrain, ytrain)

Iteration 1, loss = 1.18933755
Iteration 2, loss = 0.83997346
Iteration 3, loss = 0.80187300
Iteration 4, loss = 0.76066321
Iteration 5, loss = 0.76789468
Iteration 6, loss = 0.73175515
Iteration 7, loss = 0.74864025
Iteration 8, loss = 0.74684676
Iteration 9, loss = 0.76970043
Iteration 10, loss = 0.73019225
Iteration 11, loss = 0.69392482
Iteration 12, loss = 0.71517872
Iteration 13, loss = 0.71591268
Iteration 14, loss = 0.71654923
Iteration 15, loss = 0.67798672
Iteration 16, loss = 0.68589243
Iteration 17, loss = 0.67594368
Iteration 18, loss = 0.67245901
Iteration 19, loss = 0.65813810
Iteration 20, loss = 0.66105805
Iteration 21, loss = 0.68405609
Iteration 22, loss = 0.67209643
Iteration 23, loss = 0.64908035
Iteration 24, loss = 0.66023584
Iteration 25, loss = 0.70868791
Iteration 26, loss = 0.66146557
Iteration 27, loss = 0.65198242
Iteration 28, loss = 0.68577464
Iteration 29, loss = 0.64554093
Iteration 30, loss = 0.66151554
Iteration 31, loss = 0.66002791
Iteration 32, los

MLPClassifier(activation='tanh', hidden_layer_sizes=(100, 50, 25, 10),
              max_iter=50, random_state=1, verbose=True)

In [12]:
clf.score(Xtrain, ytrain)

0.7175666666666667

In [None]:
clf.score(Xtest, ytest)