# Face Recognition
## Part 3 Classification

In [None]:
import sys
sys.path.append('../utils/')

In [None]:
from ImageUtils import *

import numpy as np
import pandas as pd # Needs the package Pandas to be installed. Check Anaconda Environments and Packages.
from sklearn.decomposition import PCA # Needs SciKit Learn package to be installed. Check Anaconda Environments and Packages.4
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn.model_selection import KFold
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from mpl_toolkits import mplot3d

# Data preparation
## Load dataset

In [None]:
faces94_male = readFaces94MaleFaces(gray=True)
faces94_female = readFaces94FemaleFaces(gray=True)
faces94_malestaff = readFaces94MaleStaffFaces(gray=True)
landscapes = np.array(readLandsCapeImage(gray=True))

dataset = np.vstack((faces94_male, faces94_female, faces94_malestaff, landscapes))

labels = np.concatenate((
    np.ones(faces94_male.shape[0]),
    np.full(faces94_female.shape[0], 2),
#     np.full(faces94_malestaff.shape[0], 3),
    np.ones(faces94_malestaff.shape[0]),
    np.zeros(landscapes.shape[0])
))

dataset_N, height, width = dataset.shape

## Data centralization and calculate of covariance matrix

In [None]:
dataset_norm = dataset/255

In [None]:
mean = np.mean(dataset_norm.reshape(dataset_N, height*width), axis=0).reshape(height, width)

In [None]:
dataset_norm_cov = np.cov(dataset_norm.reshape(dataset_N, height*width))
dataset_norm_cov.shape

## Dimensionality reduction

## Singular Value Decomposition

In [None]:
_,s,_ = np.linalg.svd(dataset_norm_cov)

In [None]:
representation_percentage = 0.85 # Selected variability 

In [None]:
sum_eig = np.sum(s)
percentage_variance = np.divide(s, sum_eig)
sum_var = 0
num_var = 0
for i in np.arange(percentage_variance.shape[0]):
    if sum_var >= representation_percentage:
        num_var = i
        break;
    
    sum_var += percentage_variance[i]
    
num_var

In [None]:
cum_per=np.cumsum(percentage_variance)
for i in range(1,len(s)):
    change=(cum_per[i]-cum_per[i-1])/cum_per[i-1]*100
    if(change<.01):
        num_var1=i-1
        print("First",num_var1, "components with ",cum_per[num_var1]*100,"percent of variability captured and from which the contribution is less than 0.01%")
        break

plt.figure(figsize=(12,6))
plt.plot(cum_per*100)
plt.xlabel('Number of Components')
plt.ylabel('Variance (%)') #for each component
plt.title('Cumulative Summation of the Explained Variance')
plt.show()

## PCA Principal component analysis (Eigenfaces - Face space)

In [None]:
pca = PCA(n_components=num_var, svd_solver='full').fit(dataset.reshape(dataset_N, height*width))
pca.components_.shape

In [None]:
cols = 4
rows = 4
plt.figure(figsize=(30,20))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.imshow(pca.components_[i].reshape(height, width), plt.cm.gray)

### Labels classes

In [None]:
dataset_male = np.vstack((faces94_male, faces94_malestaff))

In [None]:
dataset_male.shape

### within-class

In [None]:
mean_male = np.mean(dataset_male.reshape(dataset_male.shape[0], height*width)/255, axis=0).reshape(height, width)
mean_female = np.mean(faces94_female.reshape(faces94_female.shape[0], height*width)/255, axis=0).reshape(height, width)
mean_landscape = np.mean(landscapes.reshape(landscapes.shape[0], height*width)/255, axis=0).reshape(height, width)

In [None]:
fig = plt.figure(figsize=(10,6))

ax1 = fig.add_subplot(1,3,1)
plt.title("Mean Male")
ax1.imshow(mean_male*255, plt.cm.gray)

ax2 = fig.add_subplot(1,3,2)
plt.title("Mean Female")
ax2.imshow(mean_female*255, plt.cm.gray)

ax3 = fig.add_subplot(1,3,3)
plt.title("Mean Landscapes")
ax3.imshow(mean_landscape*255, plt.cm.gray)

In [None]:
male_cov = np.cov(np.subtract(dataset_male/255, mean_male).reshape(dataset_male.shape[0], height*width))
male_cov.shape

In [None]:
female_cov = np.cov(np.subtract(faces94_female/255, mean_female).reshape(faces94_female.shape[0], height*width))
female_cov.shape

In [None]:
landscape_cov = np.cov(np.subtract(landscapes/255, mean_landscape).reshape(landscapes.shape[0], height*width))
landscape_cov.shape

In [None]:
landscape_base_matrix = np.ones((landscapes.shape[0], height*width))
male_base_matrix = np.ones((dataset_male.shape[0], height*width))
female_base_matrix = np.ones((faces94_female.shape[0], height*width))

### Projection images on face space

In [None]:
dataset_projected = pca.transform(dataset.reshape(dataset_N, height*width))
dataset_projected.shape

### Variance ratio PCA

In [None]:
pca.explained_variance_ratio_

## Unsupervised image clustering - K means model

In [None]:
#k-means
kmeans = KMeans(n_clusters=3, random_state=42).fit(dataset_projected)
wcentroids=kmeans.cluster_centers_
wcentroids.shape

In [None]:
cols = 3
rows = 1
plt.figure(figsize=(10,6))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class "+str(i+1))
    plt.imshow((np.dot(kmeans.cluster_centers_[i],pca.components_)+mean.reshape(height*width)).reshape(height, width), plt.cm.gray)

In [None]:
y_label=kmeans.labels_
wtotaldist=kmeans.transform(dataset_projected)
wdistances = np.amin(wtotaldist, axis=1)
print(wdistances.shape[0])

### Class 1

In [None]:
#class 1 k-means
kclass=0
print("Number images: "+str(wdistances[y_label==kclass].shape[0]))

In [None]:
histbox(wdistances[y_label==kclass])

In [None]:
CVresult={'w distances':wdistances,'label':y_label}
df = pd.DataFrame(CVresult)
df.sort_values('w distances', axis = 0, ascending = True, inplace = True, na_position ='first')
df2=df.loc[df[df.columns[1]]==kclass]
df2.head(6)

In [None]:
#low distances class1
cols = 4
rows = 3
plt.figure(figsize=(24,16))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class1 low distance "+ str(df2['w distances'][df2.index[i]]),fontsize=13)
    plt.imshow(dataset[df2.index[i]], plt.cm.gray)

In [None]:
#High distances class1
cols = 4
rows = 1
plt.figure(figsize=(15,10))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class1 high distance "+ str(df2['w distances'][df2.index[-(i+1)]]),fontsize=10)
    plt.imshow(dataset[df2.index[-(i+1)]], plt.cm.gray)

### Class 2

In [None]:
#class 2 k-means
kclass=1
print("Number images: "+str(wdistances[y_label==kclass].shape[0]))

In [None]:
histbox(wdistances[y_label==kclass])

In [None]:
CVresult={'w distances':wdistances,'label':y_label}
df = pd.DataFrame(CVresult)
df.sort_values('w distances', axis = 0, ascending = True, inplace = True, na_position ='first')
df2=df.loc[df[df.columns[1]]==kclass]
df2.head(6)

In [None]:
#low distances class2
cols = 4
rows = 3
plt.figure(figsize=(24,16))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class2 low distance "+ str(df2['w distances'][df2.index[i]]),fontsize=13)
    plt.imshow(dataset[df2.index[i]], plt.cm.gray)

In [None]:
#High distances class2
cols = 4
rows = 1
plt.figure(figsize=(15,10))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class2 high distance "+ str(df2['w distances'][df2.index[-(i+1)]]),fontsize=10)
    plt.imshow(dataset[df2.index[-(i+1)]], plt.cm.gray)

### Class 3

In [None]:
#class 3 k-means
kclass=2
print("Number images: "+str(wdistances[y_label==kclass].shape[0]))

In [None]:
histbox(wdistances[y_label==kclass])

In [None]:
CVresult={'w distances':wdistances,'label':y_label}
df = pd.DataFrame(CVresult)
df.sort_values('w distances', axis = 0, ascending = True, inplace = True, na_position ='first')
df2=df.loc[df[df.columns[1]]==kclass]
df2.head(6)

In [None]:
#low distances class3
cols = 4
rows = 3
plt.figure(figsize=(24,16))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class3 low distance "+ str(df2['w distances'][df2.index[i]]),fontsize=13)
    plt.imshow(dataset[df2.index[i]], plt.cm.gray)

In [None]:
#High distances class3
cols = 4
rows = 1
plt.figure(figsize=(15,10))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class3 high distance "+ str(df2['w distances'][df2.index[-(i+1)]]),fontsize=10)
    plt.imshow(dataset[df2.index[-(i+1)]], plt.cm.gray)

In [None]:
labelsk = np.concatenate((
    np.full(faces94_male.shape[0],2),
    np.ones(faces94_female.shape[0]),
    np.full(faces94_malestaff.shape[0],2),
    np.zeros(landscapes.shape[0])
))

In [None]:
cm=confusion_matrix(labelsk, y_label).ravel()

plt.figure()
plt.title("Heatmap")
prediction_data = {'y_Actual': labelsk,'y_Predicted': y_label}
df = pd.DataFrame(prediction_data, columns=['y_Actual','y_Predicted'])
confusionmatrix1 = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(confusionmatrix1, annot=True,cmap='Blues', fmt='.0f');
ax.xaxis.set_ticklabels(['landscape', 'female', 'male']); ax.yaxis.set_ticklabels(['landscape', 'female', 'male']);
ax.invert_yaxis()

In [None]:
accuracy_score(y_true=labelsk, y_pred=y_label)

In [None]:
precision_score(y_true=labelsk, y_pred=y_label, average=None)

In [None]:
print(classification_report(y_true=labelsk, y_pred=y_label, target_names=["landscape", "woman", "man"]))

In [None]:
Y=kmeans.transform(dataset_projected)
plt.figure(figsize=(10,8))
ax = plt.axes(projection='3d')
ax.scatter(Y[np.where(y_label==0),0],Y[np.where(y_label==0),1] ,Y[np.where(y_label==0),2], cmap='viridis', linewidth=1);
ax.scatter(Y[np.where(y_label==1),0],Y[np.where(y_label==1),1] ,Y[np.where(y_label==1),2], cmap='viridis', linewidth=1);
ax.scatter(Y[np.where(y_label==2),0],Y[np.where(y_label==2),1] ,Y[np.where(y_label==2),2], cmap='viridis', linewidth=1);
plt.gca().legend(('class 1','class 2','class 3'))

## Supervised image classification - LDA model
### Linear discriminant analysis

In [None]:
lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(dataset_projected, labels)

In [None]:
dataset_lda = lda.transform(dataset_projected)

In [None]:
colors = ['navy', 'turquoise', 'darkorange']
classes = ['landscapes', 'male', 'female']

In [None]:
plt.figure(figsize=(10,8))
for color, i, class_name in zip(colors, np.arange(0, 3), classes):
    plt.scatter(dataset_lda[labels == i, 0], dataset_lda[labels == i, 1], alpha=.8, color=color,
                label=class_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.xlabel('LDA Eigenvector 1')
plt.ylabel('LDA Eigenvector 2')
plt.title('LDA of EigenFaces distribution')

In [None]:
lda.explained_variance_ratio_

### Dataset projected with LDA

In [None]:
lda.predict(dataset_projected)

### Class 1 - landscape

In [None]:
CVresult={'lda':dataset_lda[:,0],'label':lda.predict(dataset_projected)}
df = pd.DataFrame(CVresult)
df.sort_values('lda', axis = 0, ascending = True, inplace = True, na_position ='first')
df2=df.loc[df[df.columns[1]]==0]
df2.head()

In [None]:
cols = 4
rows = 3
plt.figure(figsize=(24,16))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class1 LDA "+ str(df2['lda'][df2.index[i]]),fontsize=13)
    plt.imshow(dataset[df2.index[i]], plt.cm.gray)

### Class 2 - male

In [None]:
CVresult={'lda':dataset_lda[:,1],'label':lda.predict(dataset_projected)}
df = pd.DataFrame(CVresult)
df.sort_values('lda', axis = 0, ascending = True, inplace = True, na_position ='first')
df2=df.loc[df[df.columns[1]]==1]
df2.head()

In [None]:
cols = 4
rows = 3
plt.figure(figsize=(24,16))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class2 LDA "+ str(df2['lda'][df2.index[i]]),fontsize=13)
    plt.imshow(dataset[df2.index[i]], plt.cm.gray)

### Class 3 - female

In [None]:
CVresult={'lda':dataset_lda[:,1],'label':lda.predict(dataset_projected)}
df = pd.DataFrame(CVresult)
df.sort_values('lda', axis = 0, ascending = False, inplace = True, na_position ='first')
df2=df.loc[df[df.columns[1]]==2]
df2.head()

In [None]:
cols = 4
rows = 3
plt.figure(figsize=(24,16))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("Class3 LDA "+ str(df2['lda'][df2.index[i]]),fontsize=13)
    plt.imshow(dataset[df2.index[i]], plt.cm.gray)

### TSNE (T-distributed stochastic neighbor embedding)
###  high-dimensional data for visualization

In [None]:
perplexities = [5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, 4, figsize=(20, 8))
plt.axis('tight')

landscapes_class = 0
male_class = 1
female_class = 2

for i, perplexity in enumerate(perplexities):
    ax = subplots[i]

    tsne = TSNE(n_components=2, init='random',
                         random_state=0, perplexity=perplexity)
    dataset_tsne = tsne.fit_transform(dataset_projected)
    landscapes_idx = labels == landscapes_class
    male_idx = labels == male_class
    female_idx = labels == female_class
    
    ax.set_title("t-SNE Eigenfaces Perplexity=%d" % perplexity)
    
    ax.scatter(dataset_tsne[landscapes_idx, 0], dataset_tsne[landscapes_idx, 1], c=colors[landscapes_class])
    ax.scatter(dataset_tsne[male_idx, 0], dataset_tsne[male_idx, 1], c=colors[male_class])
    ax.scatter(dataset_tsne[female_idx, 0], dataset_tsne[female_idx, 1], c=colors[female_class])
    
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    ax.axis('tight')

## train test supervised models low-dimensional PCA (Logistic Regression - LDA)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_projected, labels, test_size=0.3, stratify=labels)

## Supervised image classification - Logistic Regression

In [None]:
classifier = LogisticRegression(solver='newton-cg', multi_class='multinomial')
classifier.fit(X_train, y_train)

In [None]:
y_test_pred = classifier.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.xaxis.set_ticklabels(['landscape', 'male', 'female']); ax.yaxis.set_ticklabels(['landscape', 'male', 'female']);
ax.invert_yaxis()

## Supervised image classification - LDA

In [None]:
classifier_lda = LinearDiscriminantAnalysis(n_components=2)
classifier_lda.fit(X_train, y_train)

In [None]:
y_test_pred = classifier_lda.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.xaxis.set_ticklabels(['landscape', 'male', 'female']); ax.yaxis.set_ticklabels(['landscape', 'male', 'female']);
ax.invert_yaxis()

## train test supervised models low-dimensional LDA (Logistic Regression - LDA)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_lda, labels, test_size=0.3, stratify=labels)

## Supervised image classification - Logistic Regression

In [None]:
classifier = LogisticRegression(solver='newton-cg', multi_class='multinomial')
classifier.fit(X_train, y_train)

In [None]:
y_test_pred = classifier.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.xaxis.set_ticklabels(['landscape', 'male', 'female']); ax.yaxis.set_ticklabels(['landscape', 'male', 'female']);
ax.invert_yaxis()

## train test supervised models low-dimensional TSNE (Logistic Regression)

In [None]:
tsne = TSNE(n_components=2, init='random',
                     random_state=0, perplexity=80)
dataset_tsne = tsne.fit_transform(dataset_projected)

In [None]:
dataset_tsne.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_tsne, labels, test_size=0.3, stratify=labels)

In [None]:
classifier = LogisticRegression(solver='newton-cg', multi_class='multinomial')
classifier.fit(X_train, y_train)

In [None]:
y_test_pred = classifier.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.xaxis.set_ticklabels(['male', 'female']); ax.yaxis.set_ticklabels(['landscape', 'male', 'female']);
ax.invert_yaxis()

## Aglomerative Clustering Dendogram

In [None]:
linkage_matrix = linkage(y=dataset_lda, method='weighted')

In [None]:
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    linkage_matrix,
    p=3,truncate_mode='level'
)
plt.show()