In [131]:
import numpy as np
import cv2 as cv
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

y_train = np.load("orientations_train.npy")
y_test = np.load("orientations_test.npy")

test_size = 1000
train_size = 10000
image_length = 4096
image_dim = 64

def vectorize_images(filename, data_size):
    X = np.empty(shape=(data_size, image_length))
   
    for i in range(data_size):     
        img = cv.imread("{filename}/{i}.jpg".format(filename = filename, i = i), cv.IMREAD_GRAYSCALE)
        X[i] = img.flatten()
        
    return X

X_train = vectorize_images("3dshapes_train" ,train_size)
X_test = vectorize_images("3dshapes_test" ,test_size)

__a)__

In [132]:
def apply_pca_and_svm(X_train, X_test, y_train):
    # Standardize the data
    scaler = StandardScaler()
    X_scaled_train = scaler.fit_transform(X_train)
    X_scaled_test = scaler.transform(X_test)

    # Apply PCA to extract principal components
    pca = PCA(n_components=10)        # n_components can be changed to extract more or fewer principal components as needed.
    X_train_pca = pca.fit_transform(X_scaled_train)
    X_test_pca = pca.fit_transform(X_scaled_test)

    # Train SVM classifier
    # C: regularization parameter of the SVM
         # It controls the trade-off between achieving a low training error and a low testing error. 
         # A small value of C will result in a wider margin hyperplane and a larger number of support vectors. 
         # A large value of C will result in a a narrow margin hyperplane and smaller number of support vectors.
         # In this case, we set C to 0.1, which means that we want to prioritize a wider margin hyperplane over a low training error.
    # random_state : random seed
    clf = SVC(kernel='linear', C=0.1, random_state=100)
    clf.fit(X_train_pca, y_train)

    # Predict labels for validation set
    y_pred_test = clf.predict(X_test_pca)
    return y_pred_test 

__a)__ i)

In [133]:
# Select two orientations for binary classification
train_orients_set = list(set(y_train))
orient1 = train_orients_set[0]   # 0
orient2 = train_orients_set[1]   # 1

# Create binary label vectors
y_train_bin = np.empty(train_size)
y_test_bin = np.empty(test_size)

y_train_bin = np.delete(y_train, np.argwhere( (y_train != orient1) & (y_train != orient2) ))
y_train_bin[y_train_bin == orient1] = 0
y_train_bin[y_train_bin == orient2] = 1
X_train_bin = X_train[(y_train == orient1) | (y_train == orient2)]

y_test_bin = np.delete(y_test, np.argwhere( (y_test != orient1) & (y_test != orient2) ))
y_test_bin[y_test_bin == orient1] = 0
y_test_bin[y_test_bin == orient2] = 1
X_test_bin = X_test[(y_test == orient1) | (y_test == orient2)]

# Compute validation accuracy
y_pred_bin_test = apply_pca_and_svm(X_train_bin, X_test_bin, y_train_bin)
acc_bin_test = accuracy_score(y_test_bin, y_pred_bin_test)
print("Binary classification accuracy:", acc_bin_test)


Binary classification accuracy: 0.6062992125984252


__a)__ ii)

In [134]:
# Convert labels to integers from 1 to 15
for i in range(len(train_orients_set)):
    y_train[y_train == train_orients_set[i]] = i+1
        
test_orients_set = list(set(y_test))
for i in range(len(test_orients_set)):
    y_test[y_test == test_orients_set[i]] = i+1
        
# Compute validation accuracy
y_pred_test = apply_pca_and_svm(X_train, X_test, y_train)
acc_test = accuracy_score(y_test, y_pred_test)
print("15 class classification accuracy:", acc_test)

15 class classification accuracy: 0.035
