# SVM for classification

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import mpl_toolkits.mplot3d as mplot3d
import os
from sklearn.svm import LinearSVC

### Choosing 25 subjects from 68 in the CMU PIE dataset

In [2]:
PIE_subjects = os.listdir("PIE")
selected_subjects = np.random.choice(PIE_subjects, size = 25, replace = False)
train_photos_path = []
test_photos_path  = []
for subject in selected_subjects:
    subject_photos = os.listdir("PIE"+"/"+subject)
    subject_train  = np.random.choice(subject_photos, size = int(0.7*len(subject_photos)), replace=False)
    subject_test   = np.setdiff1d(subject_photos, subject_train)
    for photo in subject_train :
        train_photos_path.append("PIE"+'/'+subject+'/'+photo)
    for photo in subject_test :
        test_photos_path.append("PIE"+'/'+subject+'/'+photo)

train_photos_array =  np.array([plt.imread(photo) for photo in train_photos_path])
test_photos_array =  np.array([plt.imread(photo) for photo in test_photos_path])
print("The selected subjects are :\n", selected_subjects)

The selected subjects are :
 ['49' '36' '58' '42' '68' '29' '63' '65' '30' '44' '11' '47' '12' '31'
 '62' '3' '19' '35' '7' '41' '57' '38' '24' '51' '26']


In [3]:
X_train = train_photos_array
Y_train = np.array([path.split('/')[-2] for path in train_photos_path])
X_test  = test_photos_array
Y_test  = np.array([path.split('/')[-2] for path in test_photos_path])

### reading my selfies

In [4]:
selfies = os.listdir("selfies")
selfies_path = []
for selfie in selfies :
    selfies_path.append("selfies" + '/' + selfie)
train_selfies_path = np.random.choice(selfies_path, size = 7, replace=False)
test_selfies_path  = np.setdiff1d(selfies_path, train_selfies_path)

#train_selfies_array = np.array([plt.imread(selfie) for selfie in train_selfies_name])
#test_selfies_array = np.array([plt.imread(selfie) for selfie in test_selfies_name])

In [5]:
X_train_selfies = np.array([plt.imread(selfie) for selfie in train_selfies_path])
X_test_selfies  = np.array([plt.imread(selfie) for selfie in test_selfies_path])

Y_train_selfies = np.array([69 for i in range(7)])
Y_test_selfies  = np.array([69 for i in range(3)])

In [6]:
_, img_size1, img_size2 = X_train.shape
num_features = img_size1*img_size2

In [7]:
# Add the selfies to X_Train, X_test and Y_train, Y_test
X_train = np.concatenate((X_train, X_train_selfies), axis=0).reshape((len(X_train)+7, num_features))
X_test  = np.concatenate((X_test, X_test_selfies), axis=0).reshape((len(X_test)+3, num_features))
Y_train = np.concatenate((Y_train, Y_train_selfies), axis=0)
Y_test  = np.concatenate((Y_test, Y_test_selfies), axis=0)

In [8]:
# The overall data
X = np.concatenate((X_train, X_test), axis=0)
Y = np.concatenate((Y_train, Y_test), axis=0)

### Apply PCA

In [9]:
def PCA(data):
    data_centered = data - np.mean(data, axis=0)
    cov_m = np.cov(data_centered.T)
    eigen_vals, eigen_vecs = np.linalg.eig(cov_m)
    # find the indexes of the highest eignevalues
    eigen_vals_indexes = np.argsort(abs(eigen_vals))[::-1]
    # sort the eigen values and vectors accordinly
    sorted_eigen_vals      = eigen_vals[eigen_vals_indexes]
    sorted_eigen_vecs      = eigen_vecs[:,eigen_vals_indexes]
    return sorted_eigen_vals, sorted_eigen_vecs

In [10]:
train_eigen_vals, train_eigen_vecs = PCA(X_train)

#### Penalty parameter

In [11]:
C = [0.01, 0.1, 1]

### Applying linear SVM

In [12]:
dims = [80, 200]
mean_Xtrain = np.mean(X_train,axis=0)
mean_Xtest  = np.mean(X_test,axis=0)

In [16]:
for d in dims:
    for c in C:
        print("Dim : {}, c : {}".format(d,c))
        # Get the reduced X_train and X_test
        reduced_X_train = np.dot(X_train - mean_Xtrain, train_eigen_vecs[:,:d]).real
        reduced_X_test  = np.dot(X_test - mean_Xtest , train_eigen_vecs[:,:d]).real
        
        # Initialize the linear SVM classifier 
        svm = LinearSVC(random_state=0, C=c, max_iter = 100000 , dual = False)
        # Train the model
        svm.fit(reduced_X_train, Y_train)
        Y_pred = svm.predict(reduced_X_test)
        print("The score is :",svm.score(reduced_X_test, Y_test))
        

Dim : 80, c : 0.01
The score is : 0.9700230591852421
Dim : 80, c : 0.1
The score is : 0.9807840122982321
Dim : 80, c : 1
The score is : 0.9761721752498078
Dim : 200, c : 0.01
The score is : 0.9846272098385856
Dim : 200, c : 0.1
The score is : 0.9884704073789393
Dim : 200, c : 1
The score is : 0.9884704073789393
