# PCA Using sklearn

In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from skimage import exposure
from skimage.feature import hog
from skimage.filters import sobel
from skimage.feature import local_binary_pattern

from numpy import linalg
import numpy.matlib
from IPython.display import clear_output
from skimage.color import rgb2gray
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Change to match data filepath on local
base_fp = 'E:\\Project\\256x256\\sketch\\tx_000100000000'


In [2]:
#Load in the full image data from the 1/20 subset of the entire dataset.
#This subset will be used to generate the PCA bases
loaded_array = np.load('images.npy')

## PCA 9 Components

In [8]:
#Initialize PCA with the desire to compress to 20 dimensions (From 256x256)
pca_9 = PCA(n_components = 9)
pca_9.fit(loaded_array.T)

PCA(n_components=9)

In [18]:
subfolders = os.listdir(base_fp)
transformed_images_9 = np.empty((0,9))
labels_9 = []

for i, subfolder in enumerate(subfolders):
    subfolder_path = os.path.join(base_fp, subfolder)
    subfolder_images = sorted(os.listdir(subfolder_path))
    print(subfolder)
        
    for image_name in subfolder_images:
        image_path = os.path.join(subfolder_path, image_name)
        if image_name.endswith(".jpg") or image_name.endswith('.png'):
            img = rgb2gray(plt.imread(image_path))
            
            row = img.flatten().reshape(1, -1)
            transformed_img = pca_9.transform(row.reshape(1,-1))
            transformed_images_9 = np.vstack([transformed_images_9, transformed_img])
            labels_9.append(subfolder)

airplane
alarm_clock
ant
ape
apple
armor
axe
banana
bat
bear
bee
beetle
bell
bench
bicycle
blimp
bread
butterfly
cabin
camel
candle
cannon
car_(sedan)
castle
cat
chair
chicken
church
couch
cow
crab
crocodilian
cup
deer
dog
dolphin
door
duck
elephant
eyeglasses
fan
fish
flower
frog
geyser
giraffe
guitar
hamburger
hammer
harp
hat
hedgehog
helicopter
hermit_crab
horse
hot-air_balloon
hotdog
hourglass
jack-o-lantern
jellyfish
kangaroo
knife
lion
lizard
lobster
motorcycle
mouse
mushroom
owl
parrot
pear
penguin
piano
pickup_truck
pig
pineapple
pistol
pizza
pretzel
rabbit
raccoon
racket
ray
rhinoceros
rifle
rocket
sailboat
saw
saxophone
scissors
scorpion
seagull
seal
sea_turtle
shark
sheep
shoe
skyscraper
snail
snake
songbird
spider
spoon
squirrel
starfish
strawberry
swan
sword
table
tank
teapot
teddy_bear
tiger
tree
trumpet
turtle
umbrella
violin
volcano
wading_bird
wheelchair
windmill
window
wine_bottle
zebra


In [19]:
np.save('transformed_images_9.npy',transformed_images_9)

In [20]:
transformed_images_9.shape

(75481, 9)

In [21]:
X_train_9, X_test_9, Y_train_9, Y_test_9 = train_test_split(transformed_images_9, labels_9, test_size = 0.2, random_state = 1)

In [22]:
%%time

# Train SVM with linear kernel

svm_model_9 = SVC(C = 30, kernel = 'linear', random_state = 0)
svm_model_9.fit(X_train_9, Y_train_9)

#Evaluate Performance
svm_preds_9 = svm_model_9.predict(X_test_9)
svm_acc = accuracy_score(Y_test_9, svm_preds_9)
print(f'SVM accuracy 9 dimensions: {svm_acc}')

SVM accuracy 9 dimensions: 0.2046764257799563
Wall time: 3h 8min 27s


## PCA 20 Components

In [3]:
#Initialize PCA with the desire to compress to 20 dimensions (From 256x256)
pca = PCA(n_components = 20)
pca.fit(loaded_array.T)

PCA(n_components=20)

In [5]:
subfolders = os.listdir(base_fp)
transformed_images = np.empty((0,20))
labels = []

for i, subfolder in enumerate(subfolders):
    subfolder_path = os.path.join(base_fp, subfolder)
    subfolder_images = sorted(os.listdir(subfolder_path))
    print(subfolder)
        
    for image_name in subfolder_images:
        image_path = os.path.join(subfolder_path, image_name)
        if image_name.endswith(".jpg") or image_name.endswith('.png'):
            img = rgb2gray(plt.imread(image_path))
            
            row = img.flatten().reshape(1, -1)
            transformed_img = pca.transform(row.reshape(1,-1))
            transformed_images = np.vstack([transformed_images, transformed_img])
            labels.append(subfolder)

airplane
alarm_clock
ant
ape
apple
armor
axe
banana
bat
bear
bee
beetle
bell
bench
bicycle
blimp
bread
butterfly
cabin
camel
candle
cannon
car_(sedan)
castle
cat
chair
chicken
church
couch
cow
crab
crocodilian
cup
deer
dog
dolphin
door
duck
elephant
eyeglasses
fan
fish
flower
frog
geyser
giraffe
guitar
hamburger
hammer
harp
hat
hedgehog
helicopter
hermit_crab
horse
hot-air_balloon
hotdog
hourglass
jack-o-lantern
jellyfish
kangaroo
knife
lion
lizard
lobster
motorcycle
mouse
mushroom
owl
parrot
pear
penguin
piano
pickup_truck
pig
pineapple
pistol
pizza
pretzel
rabbit
raccoon
racket
ray
rhinoceros
rifle
rocket
sailboat
saw
saxophone
scissors
scorpion
seagull
seal
sea_turtle
shark
sheep
shoe
skyscraper
snail
snake
songbird
spider
spoon
squirrel
starfish
strawberry
swan
sword
table
tank
teapot
teddy_bear
tiger
tree
trumpet
turtle
umbrella
violin
volcano
wading_bird
wheelchair
windmill
window
wine_bottle
zebra


In [6]:
np.save('transformed_images.npy',transformed_images)

In [23]:
X_train, X_test, Y_train, Y_test = train_test_split(transformed_images, labels, test_size = 0.2, random_state = 1)

In [25]:
%%time

# Train SVM with linear kernel

svm_model = SVC(C = 30, kernel = 'linear', random_state = 0)
svm_model.fit(X_train, Y_train)

#Evaluate Performance
svm_preds = svm_model.predict(X_test)
svm_acc = accuracy_score(Y_test, svm_preds)
print(f'SVM accuracy 20 dimensions: {svm_acc}')

SVM accuracy 20 dimensions: 0.27217327945949527
