In [1]:
import matplotlib.pyplot as plt
 
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.decomposition import PCA
from sklearn.neural_network import MLPClassifier
 
 
# Load data
lfw_dataset = fetch_lfw_people(min_faces_per_person=100)
 
_, h, w = lfw_dataset.images.shape
X = lfw_dataset.data
y = lfw_dataset.target
target_names = lfw_dataset.target_names
 
## split into a training and testing set
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

Downloading LFW metadata: https://ndownloader.figshare.com/files/5976012


URLError: <urlopen error [WinError 10054] An existing connection was forcibly closed by the remote host>

In [2]:
# Compute a PCA 
n_components = 100
pca = PCA(n_components=n_components, whiten=True).fit(X_train)
 
# apply PCA transformation
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

In [3]:
# train a neural network
print("Fitting the classifier to the training set")
clf = MLPClassifier(hidden_layer_sizes=(1024,), batch_size=256, verbose=True, early_stopping=True).fit(X_train_pca, y_train)                                                                 

Fitting the classifier to the training set
Iteration 1, loss = 1.50909133
Validation score: 0.575000
Iteration 2, loss = 1.10997812
Validation score: 0.587500
Iteration 3, loss = 0.88640745
Validation score: 0.650000
Iteration 4, loss = 0.70057427
Validation score: 0.725000
Iteration 5, loss = 0.54886776
Validation score: 0.812500
Iteration 6, loss = 0.43613688
Validation score: 0.837500
Iteration 7, loss = 0.35600952
Validation score: 0.862500
Iteration 8, loss = 0.29247010
Validation score: 0.862500
Iteration 9, loss = 0.24153092
Validation score: 0.875000
Iteration 10, loss = 0.19988520
Validation score: 0.862500
Iteration 11, loss = 0.16786811
Validation score: 0.862500
Iteration 12, loss = 0.14149779
Validation score: 0.850000
Validation score did not improve more than tol=0.000100 for two consecutive epochs. Stopping.


In [4]:
y_pred = clf.predict(X_test_pca)
print(classification_report(y_test, y_pred, target_names=target_names))

                   precision    recall  f1-score   support

     Colin Powell       0.97      0.85      0.90        84
  Donald Rumsfeld       0.82      0.82      0.82        28
    George W Bush       0.81      0.97      0.88       145
Gerhard Schroeder       0.76      0.67      0.71        33
       Tony Blair       0.92      0.67      0.78        52

      avg / total       0.86      0.85      0.85       342



In [5]:
# Visualization
def plot_gallery(images, titles, h, w, rows=3, cols=4):
    plt.figure()
    for i in range(rows * cols):
        plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray)
        plt.title(titles[i])
        plt.xticks(())
        plt.yticks(())
 
def titles(y_pred, y_test, target_names):
    for i in range(y_pred.shape[0]):
        pred_name = target_names[y_pred[i]].split(' ')[-1]
        true_name = target_names[y_test[i]].split(' ')[-1]
        yield 'predicted: {0}\ntrue: {1}'.format(pred_name, true_name)
 
prediction_titles = list(titles(y_pred, y_test, target_names))
plot_gallery(X_test, prediction_titles, h, w)