In [None]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from skimage import exposure
from skimage.feature import hog
from skimage.filters import sobel
from skimage.feature import local_binary_pattern

from numpy import linalg
import numpy.matlib
from IPython.display import clear_output
from skimage.color import rgb2gray

# Change to match data filepath on local
base_fp = 'E:\\Project\\256x256\\sketch\\tx_000100000000'


In [None]:
#Initialize Image Matrix for PCA

xdim, ydim = 256, 256
N = 0
D = np.empty((0, xdim * ydim), dtype = np.uint8)
counter = 0

#Load every 20th image due to memory purposes)
for subfolder in os.listdir(base_fp):
    subfolder_path = os.path.join(base_fp, subfolder)
    for filename in os.listdir(subfolder_path):
        filepath = os.path.join(subfolder_path, filename)
        
        if filename.endswith(".jpg") or filename.endswith(".png"):
            counter +=1
            if counter % 20  == 0:
                img = rgb2gray(plt.imread(filepath))
                
                row = img.flatten().reshape(1, -1)
                D = np.vstack((D, row))
                
            
D = D.T

print("loaded " + str(D.shape[1]) + " images")

In [None]:
#PCA: zero-mean

mu = np.mean(D, axis = 1) #mean
mu = np.matlib.repmat(mu, D.shape[1], 1)
D2 = D - mu.T

C2 = D2.T@D2

#obtain eigenvectors
val, vec = linalg.eig(C2)

In [None]:
#project onto new Basis with top 20 representations only, P vector is the corresponding evaluation for each image. 
#will need to feed this through the entire dataset, not just the trimmed 1/20th shape

d = 20
P = np.zeros((d, D.shape[1]))

for k in range(d):
    e = D2@vec[:,k]
    P[k,:] = e.T@D


In [None]:
#Visualize the eigenbasis representation for one of the images
P[:,0]

## Visualization of Reconstructed Images

In [None]:
#Top 20 eigenvectors and eigenvalues
idx = val.argsort()[::-1][:20]
B = vec[:,idx]

coefficients = np.dot(D2, B)

#Each Image Reconstructed
D_reconstructed = np.dot(B, coefficients.T)

In [None]:
# Plot Top 9 basis representations
for k in range(20):
    e = D2@vec[:,k]
    plt.subplot(4,5,k+1)
    plt.imshow(e.reshape(xdim, ydim), cmap = 'gray')
    plt.axis('off')

In [None]:
#Compare original image to the dimension representation
plt.figure()
plt.imshow(D_reconstructed[0].reshape(xdim, ydim), cmap = 'gray')
plt.figure()
plt.imshow(D[:,0].reshape(xdim, ydim), cmap = 'gray')

In [None]:
#Nearest Neighbor of any given image, according to the reduced dimension representation
D3 = D2 + mu.T

j = 1719
mindist = 1e10
for k in range(D3.shape[1]):
    if (j != k):
        dist = linalg.norm(P[:,k] - P[:,j])
        if (dist < mindist):
            mindist = dist
            minind = k 

plt.figure()
plt.subplot(121)
plt.imshow(D3[:,j].reshape(xdim, ydim), cmap = 'gray')
plt.axis('off')
plt.subplot(122)
plt.imshow(D3[:,minind].reshape(xdim, ydim), cmap = 'gray')
plt.axis('off')