In [1]:
import os
from sklearn.datasets import fetch_olivetti_faces
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split,GridSearchCV
os.chdir(r'D:\Digital_Image_Processing\Lab_9')


In [2]:
def mlp_fun(X,y,hidden_layer,activation,learning_rate,test_size = 0.3,random_state = 100):
    '''Main function that determines and runs the mlp algorithm with the best accuracy score:
    
    ########################parameters########################
        
        X: Input images (number of images, pixels per image).
        y: label of image.
        hidden layer sizes: list of tuples of nodes,layers.
        activation funcion: list of activation functions to run.
        learning rate: list of learning rates. 
        
    
    ########################returns###########################
    
        best classifier with parameters
        predictecd values
        tested values
        
    '''

    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = test_size,random_state=random_state)
    

    param_grid = {
            'hidden_layer_sizes': hidden_layer,
            'activation': activation,
            'learning_rate_init': learning_rate
        } 

    clf = GridSearchCV(MLPClassifier(solver='sgd',verbose=False,random_state=random_state),param_grid=param_grid,n_jobs=-1)
    clf = clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    return clf,y_pred,y_test

In [3]:
faces = fetch_olivetti_faces()
X = faces.data
y = faces.target

In [4]:
print('There are {} digits in this dataset.'.format(str(X.shape[0])))
print('There are {} pixels per image.'.format(str(X.shape[1])))
print('There are {} classes.'.format(str(max(y+1))))

hidden_layer = [(200 ,6), (300,100), (400, 200),(500,500)]
activation = ['relu','logistic','tanh']
learning_rate = [5e-1, 5e-2, 5e-3]

There are 400 digits in this dataset.
There are 4096 pixels per image.
There are 40 classes.


In [5]:

clf,y_pred,y_test = mlp_fun(X,y,hidden_layer,activation,learning_rate)
    




In [6]:
print('Accuracy Score: {}'.format(accuracy_score(y_test,y_pred)))
print('Best learning rate, hidden layer size, activation function: {},{},{}'.format(clf.best_estimator_.learning_rate_init,clf.best_estimator_.hidden_layer_sizes,clf.best_estimator_.activation))

Accuracy Score: 0.925
Best learning rate, hidden layer size, activation function: 0.005,(400, 200),tanh


In [7]:

print(confusion_matrix(y_test,y_pred))

[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 5 ... 0 0 0]
 ...
 [0 0 0 ... 2 0 0]
 [0 0 0 ... 0 5 0]
 [0 0 0 ... 0 0 2]]


### How does the value of the learning rate affect the classification accuracy? Why?

The lower the learning rate, the slower the 'descent' of the gradient descent.  If the learning rate is too high, the next step of the gradient descent may not converge to the minimum of the cost function.  


### Does the increased hidden layer size improve the classification accuracy? Why?

The increase number of layers helps fit the data.  However, notice that there is not a direct relationship between number of layers and accuracy.  Notice the classification algorithm with the best results was the one with 200 hidden layers with 400 nodes, not the one with 500 layers and 500 nodes.  If one overfits the data with too many layers, the algorithm will not perform well on the test dataset.