In [None]:
import sys
sys.path.append('../utils/')

In [None]:
from ImageUtils import *

import numpy as np
import pandas as pd # Needs the package Pandas to be installed. Check Anaconda Environments and Packages.
from sklearn.decomposition import PCA # Needs SciKit Learn package to be installed. Check Anaconda Environments and Packages.4
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter

In [None]:
faces94_male = readFaces94MaleFaces(gray=True)
faces94_female = readFaces94FemaleFaces(gray=True)
faces94_malestaff = readFaces94MaleStaffFaces(gray=True)
landscapes = np.array(readLandsCapeImage(gray=True))

dataset = np.vstack((faces94_male, faces94_female, faces94_malestaff, landscapes))

labels = np.concatenate((
    np.ones(faces94_male.shape[0]),
    np.full(faces94_female.shape[0], 2),
#     np.full(faces94_malestaff.shape[0], 3),
    np.ones(faces94_malestaff.shape[0]),
    np.zeros(landscapes.shape[0])
))

dataset_N, height, width = dataset.shape

In [None]:
dataset_norm = dataset/255

In [None]:
mean = np.mean(dataset_norm.reshape(dataset_N, height*width), axis=0).reshape(height, width)

In [None]:
dataset_norm_cov = np.cov(dataset_norm.reshape(dataset_N, height*width))
dataset_norm_cov.shape

In [None]:
_,s,_ = np.linalg.svd(dataset_norm_cov)

In [None]:
representation_percentage = 0.85

In [None]:
sum_eig = np.sum(s)
percentage_variance = np.divide(s, sum_eig)
sum_var = 0
num_var = 0
for i in np.arange(percentage_variance.shape[0]):
    if sum_var >= representation_percentage:
        num_var = i
        break;
    
    sum_var += percentage_variance[i]
    
num_var

In [None]:
pca = PCA(n_components=num_var, svd_solver='full').fit(dataset.reshape(dataset_N, height*width))
pca.components_.shape

In [None]:
dataset_male = np.vstack((faces94_male, faces94_malestaff))

In [None]:
dataset_male.shape

In [None]:
mean_male = np.mean(dataset_male.reshape(dataset_male.shape[0], height*width)/255, axis=0).reshape(height, width)
mean_female = np.mean(faces94_female.reshape(faces94_female.shape[0], height*width)/255, axis=0).reshape(height, width)
mean_landscape = np.mean(landscapes.reshape(landscapes.shape[0], height*width)/255, axis=0).reshape(height, width)

In [None]:
fig = plt.figure(figsize=(8,10))

ax1 = fig.add_subplot(1,3,1)
plt.title("Mean Male")
ax1.imshow(mean_male*255, plt.cm.gray)

ax2 = fig.add_subplot(1,3,2)
plt.title("Mean Female")
ax2.imshow(mean_female*255, plt.cm.gray)

ax3 = fig.add_subplot(1,3,3)
plt.title("Mean Landscapes")
ax3.imshow(mean_landscape*255, plt.cm.gray)

In [None]:
male_cov = np.cov(np.subtract(dataset_male/255, mean_male).reshape(dataset_male.shape[0], height*width))
male_cov.shape

In [None]:
female_cov = np.cov(np.subtract(faces94_female/255, mean_female).reshape(faces94_female.shape[0], height*width))
female_cov.shape

In [None]:
landscape_cov = np.cov(np.subtract(landscapes/255, mean_landscape).reshape(landscapes.shape[0], height*width))
landscape_cov.shape

In [None]:
landscape_base_matrix = np.ones((landscapes.shape[0], height*width))
male_base_matrix = np.ones((dataset_male.shape[0], height*width))
female_base_matrix = np.ones((faces94_female.shape[0], height*width))

In [None]:
dataset_projected = pca.transform(dataset.reshape(dataset_N, height*width))
dataset_projected.shape

In [None]:
pca.explained_variance_ratio_

In [None]:
lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(dataset_projected, labels)

In [None]:
dataset_lda = lda.transform(dataset_projected)

In [None]:
colors = ['navy', 'turquoise', 'darkorange']
classes = ['landscapes', 'male', 'female']

In [None]:
plt.figure()
for color, i, class_name in zip(colors, np.arange(0, 3), classes):
    plt.scatter(dataset_lda[labels == i, 0], dataset_lda[labels == i, 1], alpha=.8, color=color,
                label=class_name)
plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('LDA of EigenFaces distribution')


In [None]:
perplexities = [5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, 4, figsize=(20, 8))
plt.axis('tight')

landscapes_class = 0
male_class = 1
female_class = 2

for i, perplexity in enumerate(perplexities):
    ax = subplots[i]

    tsne = TSNE(n_components=2, init='random',
                         random_state=0, perplexity=perplexity)
    dataset_tsne = tsne.fit_transform(dataset_projected)
    landscapes_idx = labels == landscapes_class
    male_idx = labels == male_class
    female_idx = labels == female_class
    
    ax.set_title("t-SNE Eigenfaces Perplexity=%d" % perplexity)
    
    ax.scatter(dataset_tsne[landscapes_idx, 0], dataset_tsne[landscapes_idx, 1], c=colors[landscapes_class])
    ax.scatter(dataset_tsne[male_idx, 0], dataset_tsne[male_idx, 1], c=colors[male_class])
    ax.scatter(dataset_tsne[female_idx, 0], dataset_tsne[female_idx, 1], c=colors[female_class])
    
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    ax.axis('tight')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_projected, labels, test_size=0.3, stratify=labels)

In [None]:
classifier = LogisticRegression(solver='newton-cg', multi_class='multinomial')
classifier.fit(X_train, y_train)

In [None]:
y_test_pred = classifier.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()

In [None]:
classifier_lda = LinearDiscriminantAnalysis(n_components=2)
classifier_lda.fit(X_train, y_train)

In [None]:
y_test_pred = classifier_lda.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_lda, labels, test_size=0.3, stratify=labels)

In [None]:
classifier = LogisticRegression(solver='newton-cg', multi_class='multinomial')
classifier.fit(X_train, y_train)

In [None]:
y_test_pred = classifier.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()

In [None]:
tsne = TSNE(n_components=2, init='random',
                     random_state=0, perplexity=80)
dataset_tsne = tsne.fit_transform(dataset_projected)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(dataset_tsne, labels, test_size=0.3, stratify=labels)

In [None]:
classifier = LogisticRegression(solver='newton-cg', multi_class='multinomial')
classifier.fit(X_train, y_train)

In [None]:
y_test_pred = classifier.predict(X_test)

In [None]:
accuracy_score(y_true=y_test, y_pred=y_test_pred)

In [None]:
precision_score(y_true=y_test, y_pred=y_test_pred, average=None)

In [None]:
print(classification_report(y_true=y_test, y_pred=y_test_pred, target_names=["landscape", "man", "woman"]))

In [None]:
plt.figure()
plt.title("Heatmap")
classes_dict = {'Actual': y_test.tolist(), 'Predicted': y_test_pred.tolist()}
classes_df = pd.DataFrame(classes_dict, columns=["Actual", "Predicted"])
conf_matrix = pd.crosstab(classes_df['Actual'], classes_df['Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(conf_matrix, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()