## Image Classification

In [6]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [None]:
# Importing dataset
lfw = datasets.fetch_lfw_people(min_faces_per_person=100, resize=0.4)

In [None]:
# Checking what all info the dataset consists of
lfw.keys()

In [None]:
# inspecting shape of the images 
# should be a tuple of the form (a, b, c), where:
# => a = no of images
# => b,c - dimensions of a single pixel
lfw.images.shape

In [None]:
# Plotting the images using matplotlib
fig = plt.figure(figsize=(8, 8))
for i in range(64):
    ax = fig.add_subplot(8,8,i+1)
    ax.imshow(lfw.images[i], cmap = plt.cm.bone)
plt.show()

In [None]:
# Applying PCA without deimensionality reduction
x, y = lfw.data, lfw.target
pca = PCA()
pca.fit(x)

In [None]:
# Determining best K
k = 0
total = 0
while total < 0.99:
    total += pca.explained_variance_ratio_[k]
    k += 1
k

In [None]:
pca_lfw = PCA(n_components = k, whiten = True)
x_transformed = pca_lfw.fit_transform(x)
x_transformed.shape

In [None]:
# Reproducing the images and plotting them
x_approx = pca_lfw.inverse_transform(x_transformed)
x_approx = x_approx.reshape((1140, 50, 37))

In [None]:
fig = plt.figure(figsize=(8, 8))
for i in range(64):
    ax = fig.add_subplot(8,8,i+1)
    ax.imshow(x_approx[i], cmap = plt.cm.bone)
plt.show()

In [None]:
# Eigenfaces
pca_lfw.components_.shape

In [None]:
# Reshape the components from all to k
eigenv = pca.components_.reshape(k, 50, 37)

In [None]:
fig = plt.figure(figsize=(8, 8))
for i in range(64):
    ax = fig.add_subplot(8,8,i+1, xticks = [], yticks = [])
    ax.imshow(eigenv[i], cmap = plt.cm.bone)
plt.show()