# ENGR421 HW6 

# ÖZGE SAYAR

In [1]:
import cvxopt as cvx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.spatial.distance as dt

### Import Data  

In [2]:
# read data into memory
data_set_images = np.genfromtxt("hw06_data_set_images.csv", delimiter = ",")
data_set_labels = np.genfromtxt("hw06_data_set_labels.csv", delimiter = ",")

#define train data (25 x 5)
trainingdata_images = data_set_images[:1000]
trainingdata_labels = data_set_labels[:1000]       
    
#define test data (14 x 5)
testdata_images = data_set_images[1000:]   
testdata_labels = data_set_labels[1000:]  

# get number of samples and number of features
N_train_images = len(trainingdata_images)
N_test_images = len(testdata_images)


### Calculating Color Histograms 

In [None]:
bin_calculator = np.zeros((N_train_images, 64)) 
bins = np.zeros((N_train_images, 784))
bin_loc = np.zeros((N_train_images, 784))
for i in range (N_train_images):
    for j in range (784):
        bin_loc = np.floor(trainingdata_images[i,j]/4).astype(int) 
        bin_calculator[i, bin_loc] = bin_calculator[i, bin_loc] + 1 
        H_train = bin_calculator / 784
                
print(H_train[0:5,0:5])

bin_calculator = np.zeros((N_test_images, 64))
bins = np.zeros((N_test_images, 784))
bin_loc = np.zeros((N_test_images, 784))
for i in range (N_test_images):
    for j in range (784):
        bin_loc = np.floor(testdata_images[i,j]/4).astype(int) 
        bin_calculator[i, bin_loc] = bin_calculator[i, bin_loc] + 1 
        H_test = bin_calculator / 784
                
print(H_test[0:5,0:5])

### Histogram Intersection Kernel 

In [None]:
def intersection_kernel(hi,hj):
    return np.sum(min(hi[l],hj[l]) for l in range(64))

K_train = np.zeros((N_train_images, N_train_images))
for i in range (N_train_images):
    for j in range (N_train_images):
        K_train[i][j] = intersection_kernel(H_train[i],H_train[j])
        
K_test = np.zeros((N_test_images, N_test_images))
for i in range (N_test_images):
    for j in range (N_train_images):
        K_test[i][j] = intersection_kernel(H_test[i], H_train[j])
        
print(K_train)
print(K_test)

### Training Support Vector Machine Classifier

In [None]:

def svm_classifier(C):
    X_train = trainingdata_images 
    y_train = trainingdata_labels
    X_test = testdata_images 
    y_test = testdata_labels

    s = 1
    C = 10
    epsilon = 0.001

    yyK = np.matmul(y_train[:,None], y_train[None,:]) * K_train

    P = cvx.matrix(yyK)
    q = cvx.matrix(-np.ones((N_train_images, 1)))
    G = cvx.matrix(np.vstack((-np.eye(N_train_images), np.eye(N_train_images))))
    h = cvx.matrix(np.vstack((np.zeros((N_train_images, 1)), C * np.ones((N_train_images, 1)))))
    A = cvx.matrix(1.0 * y_train[None,:])
    b = cvx.matrix(0.0)


    # use cvxopt library to solve QP problems
    result = cvx.solvers.qp(P, q, G, h, A, b)
    alpha = np.reshape(result["x"], N_train_images)
    alpha[alpha < C * epsilon] = 0
    alpha[alpha > C * (1 - epsilon)] = C


    # find bias parameter
    support_indices, = np.where(alpha != 0)
    active_indices, = np.where(np.logical_and(alpha != 0, alpha < C))
    w0 = np.mean(y_train[active_indices] * (1 - np.matmul(yyK[np.ix_(active_indices, support_indices)], alpha[support_indices])))
    
    return (w0, alpha)


In [None]:
def prediction(K_train, K_test, C):
    
    w0, alpha = svm_classifier(C)
    f_predicted_train = np.matmul(K_train, y_train[:,None] * alpha[:,None]) + w0
    y_predicted_train = 2 * (f_predicted_train > 0.0) - 1
    f_predicted_test = np.matmul(K_test, y_train[:,None] * alpha[:,None]) + w0
    y_predicted_test = 2 * (f_predicted_test > 0.0) - 1
    
    return(y_predicted_train,y_predicted_test)
    

confusion_matrix_train = pd.crosstab(np.reshape(y_predicted_train, N_train_images), y_train,
                               rownames = ["y_predicted"], colnames = ["y_train"])
print(confusion_matrix_train)



confusion_matrix_test = pd.crosstab(np.reshape(y_predicted_test, N_test_images), y_test,
                               rownames = ["y_predicted"], colnames = ["y_test"])
print(confusion_matrix_test)

### Figure 

In [None]:
C = [10**-1, 10**-.5, 10**0, 10**.5, 10**1, 10**1.5, 10**2, 10**2.5, 10**3]

training_scores = []
test_scores = []

def score(y_predicted, y_truth):
    score = 0.0
    for i in range(len(y_truth)):
        score += 1
    accuracy_score = float(score / len(y_truth))
    return accuracy_score

for c in C:
    y_predicted_train, y_predicted_test = prediction(K_train, K_test, c)
    training_scores.append(score(y_predicted_train, y_train))
    test_scores.append(score(y_predicted_test, y_test))
    
plt.figure(figsize=(15,8))
plt.plot(str(C), training_scores, "-ob", markersize=4, label='training')
plt.plot(str(C), test_scores, "-or", markersize=4, label='test')
plt.xlabel("Regularization parameter (C)")
plt.ylabel("Accuracy")
plt.legend(loc='upper left')
plt.show()