In [None]:
import sys
sys.path.append('../utils/')

In [None]:
from ImageUtils import *

In [None]:
import numpy as np
import pandas as pd # Needs the package Pandas to be installed. Check Anaconda Environments and Packages.
from sklearn.decomposition import PCA # Needs SciKit Learn package to be installed. Check Anaconda Environments and Packages.4
from sklearn.covariance import LedoitWolf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score
import matplotlib.pyplot as plt
from scipy.spatial.distance import mahalanobis
from collections import Counter
from sklearn.preprocessing import label_binarize
import time
from sklearn import preprocessing
import ipywidgets as widgets
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import KFold

In [None]:
faces94_male = readFaces94MaleFaces(gray=True)
faces94_female = readFaces94FemaleFaces(gray=True)
faces94_malestaff = readFaces94MaleStaffFaces(gray=True)

dataset = np.vstack((faces94_male, faces94_female, faces94_malestaff))

dataset_N, height, width = dataset.shape
dataset.shape

## Data centralization and calculate of covariance matrix

In [None]:
A=dataset.reshape(dataset_N, height*width)/255 # normalización para reducir complejidad

In [None]:
dist_1=[]
start_time = time.time()
for i in range(A.shape[0]):
    d = np.linalg.norm(np.subtract(A[i], A), ord=2, axis=1)
    s=np.sum(np.sum(d,axis=0)) # suma de las difenrecias
    dist_1.append(s) # Guardando la suma de las diferencias de la imagen i a las demas

total_time = time.time() - start_time

print("Time elapsed for operation: {}".format(total_time))

In [None]:
Min_1=np.argmin(np.array(dist_1))

In [None]:
plt.imshow(dataset[Min_1],plt.cm.gray)

In [None]:
Min_1 #Median image index: 394 (including male, female, malestaff and landscapes)
      #Median image index: 393 (including only male, female and malestaff

In [None]:
plt.imshow(dataset[393],plt.cm.gray)

In [None]:
Median_all = dataset[393]

In [None]:
data=dataset.reshape(dataset_N, height*width) - Median_all.reshape(height*width)
datasetmedian=(1/(dataset_N-1))*(np.dot(data,data.T)) # Covariance matrix
print(datasetmedian.shape)

### Singular value decomposition

In [None]:
u,s,vh = np.linalg.svd(datasetmedian)

### Face space: selection of subspace componets

#### Option 1: investigator's criteria of varibility captured

In [None]:
representation_percentage = 0.85 # Selected variability 

In [None]:
sum_eig = np.sum(s)
percentage_variance = np.divide(s, sum_eig)
sum_var = 0
num_var = 0
for i in np.arange(percentage_variance.shape[0]):
    if sum_var >= representation_percentage:
        num_var = i
        break;
    
    sum_var += percentage_variance[i]
    
num_var_select=num_var    
print("Principal components number: ",num_var_select)
print("Percent of variability captured: ",sum_var*100)
print("Images in datasets",dataset_N)

#### Option 2: investigator's criteria of threshold contribution value

In [None]:
cum_per=np.cumsum(percentage_variance)
for i in range(1,len(s)):
    change=(cum_per[i]-cum_per[i-1])/cum_per[i-1]*100
    if(change<.01):
        num_var=i-1
        print("First",num_var, "components with ",cum_per[num_var]*100,"percent of variability captured and from which the contribution is less than 0.01%")
        break

plt.figure(figsize=(12,6))
plt.plot(cum_per*100)
plt.xlabel('Number of Components')
plt.ylabel('Variance (%)') #for each component
plt.title('Cumulative Summation of the Explained Variance')
plt.show()

## Face Space

In [None]:
EigenvectorsA=np.dot(data.T,u[:,0:num_var_select])
NormEigenvectorsA = preprocessing.normalize(EigenvectorsA,axis=0, norm='l2')
print(np.linalg.norm(NormEigenvectorsA[:,5],ord=None))#check normalizacion vectores propios de XT.X

In [None]:
cols = 4
rows = 4
plt.figure(figsize=(30,20))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.imshow(NormEigenvectorsA[:,i].reshape(height, width), plt.cm.gray)

## Projection of an image on face space

In [None]:
start=0.8
step=0.06
stop=1

facespace(percentage_variance,dataset,data,Median_all,u,dataset_N,height,width,start,step,stop,0)

In [None]:
print("Principal components number: ",num_var_select)
print("Percent of variability captured: ",sum_var*100)
print("Images in datasets",dataset_N)
print("Omega matrix facespace",np.dot(data,NormEigenvectorsA).shape)

### Specific image

In [None]:
import ipywidgets as widgets
n=widgets.BoundedFloatText(value=2690,min=0,max=dataset_N,description='image:')
display(n)

In [None]:
N_image=int(n.value)
specificimage(data,dataset,NormEigenvectorsA,Median_all,N_image,dataset_N,height,width)

### Random image

In [None]:
randomimage(data,dataset,NormEigenvectorsA,Median_all,dataset_N,height,width)

### Distances and outliers

In [None]:
dataReconstructed=np.dot(np.dot(data,NormEigenvectorsA),NormEigenvectorsA.T)+Median_all.reshape(height*width)
print(dataReconstructed.shape)

In [None]:
Norm=widgets.Dropdown(options=['1', '2', 'inf'],value='2',description='Norm:',disabled=False)
display(Norm)

In [None]:
if str(Norm.value)=='inf':
    ordn=np.inf
else:
    ordn=int(Norm.value)

In [None]:
edistance = np.linalg.norm(np.subtract(dataReconstructed, dataset.reshape(dataset_N, height*width)), ord=ordn, axis=1)
print(edistance.shape)

In [None]:
histbox(edistance)

In [None]:
threshold, outliers, zsort, indexsort, z=outlierseigenfaces(edistance,3)

print('Outliers threshold method=',np.size(outliers))
print('threshold=',threshold)
CVresult={'outliers distance':outliers,'z':zsort}
df = pd.DataFrame(CVresult)
df.sort_values('z', axis = 0, ascending = False, inplace = True, na_position ='first') 
df.head(np.size(outliers)) 

#### Low and high distance

In [None]:
fig = plt.figure(figsize=(8,10))
ax1 = fig.add_subplot(1,2,1)
plt.title("Similar Image")
ax1.imshow(dataset[indexsort[0]], plt.cm.gray)
ax2 = fig.add_subplot(1,2,2)
plt.title("Dissimilar Image")
ax2.imshow(dataset[indexsort[-1]], plt.cm.gray)

#### High distances

In [None]:
cols = 4
rows = 2
plt.figure(figsize=(25,15))
for i in np.arange(rows * cols):
    plt.subplot(rows, cols, i + 1)
    plt.title("z "+str(z[indexsort[-(i+1)]]),fontsize=20)
    plt.imshow(dataset[indexsort[-(i+1)]], plt.cm.gray)

## Face recognition dataset

In [None]:
landscapes = np.array(readLandsCapeImage(gray=True))

landimages(landscapes,height,width,Median_all,NormEigenvectorsA,ordn,outliers)

In [None]:
landimage=landscapes.reshape(landscapes.shape[0],height*width)-Median_all.reshape(height*width)
dataReconstructedland=np.dot(np.dot(landimage,NormEigenvectorsA),NormEigenvectorsA.T)+Median_all.reshape(height*width)
print(dataReconstructedland.shape)

edistanceland = np.linalg.norm(np.subtract(dataReconstructedland, landscapes.reshape(landscapes.shape[0], height*width)), ord=ordn, axis=1)
totaldistance=np.append(edistance,edistanceland)
histbox(totaldistance)

In [None]:
y_true=np.ones(dataset_N)
y_true=np.append(y_true,np.zeros(landscapes.shape[0]))
y_pred=(totaldistance<=outliers[0])*1

tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
print('TP=', tp,'TN=',tn,'FP=',fp,'FN=', fn)
print('accuracy= ', (tp+tn)/(tp+tn+fp+fn))

plt.figure()
plt.title("Heatmap")
prediction_data = {'y_Actual': y_true,'y_Predicted': y_pred}
df = pd.DataFrame(prediction_data, columns=['y_Actual','y_Predicted'])
confusionmatrix1 = pd.crosstab(df['y_Actual'], df['y_Predicted'], rownames=['Actual'], colnames=['Predicted'])
ax=sns.heatmap(confusionmatrix1, annot=True,cmap='Blues', fmt='.0f');
ax.invert_yaxis()
ax.invert_xaxis()

#### False positive

In [None]:
for outlier in outliers:
    print(np.where(outlier > edistanceland))

In [None]:
N_land= int(np.where(edistanceland < outliers[6])[0][3])
landimage=landscapes[N_land].reshape(height*width)-Median_all.reshape(height*width)#seleccionar imagen individual
wland=np.dot(landimage,NormEigenvectorsA)#pesos w de cada Eigenface en subespacio generado
Reconstland=np.dot(wland,NormEigenvectorsA.T)+Median_all.reshape(height*width)#es mas claro w*vectores propios transpuestos
fig = plt.figure(figsize=(8,10))
ax1 = fig.add_subplot(1,2,1)
plt.title("Land image")
ax1.imshow(landscapes[N_land], plt.cm.gray)
ax2 = fig.add_subplot(1,2,2)
plt.title("Reconstructed land Image")
ax2.imshow(Reconstland.reshape(height, width), plt.cm.gray)
print('distancia',edistanceland[N_land])

In [None]:
accuracy, tncv, fpcv, fncv, tpcv=kfold(y_true,landscapes,dataset,height,width,ordn)
CVresult={'accuracy':accuracy,'tn':tncv,'fp':fpcv,'fn':fncv,'tp':tpcv}
df = pd.DataFrame(CVresult)
df.head()   