
## Question 3 - support vector machine 

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
from math import sqrt
import os

In [None]:
TRAIN = 0
TEST = 1
NUMBER_OF_CLASSES = 10
NUMBER_OF_FOLDS = 5
MU = 0
BATCH = 10
SIGMA = 0.1
CONSTANT = 0
REGRESSIVE = 1

### Read Data

In [None]:
def read_data(filename):

    data = pd.read_csv(filename)
    labels = np.array(data.iloc[:,0])
    data.drop(data.columns[0], 1, inplace=True)
    return data.to_numpy(), labels

In [None]:
file_names = os.listdir('Question 3')

train_data, train_labels = read_data(file_names[TRAIN])
test_data, test_labels = read_data(file_names[TEST])  

### 5 fold Cross Validation

In [None]:
def cross_validation(number_of_folds, fold_number, x, y):


    n, m = np.shape(train_data)

    begin = int(np.ceil(n / number_of_folds* fold_number))
    end = int(np.ceil(n / number_of_folds* (fold_number + 1)))

    y_train = np.array([])
    x_train = np.zeros((0, m))

    if fold_number > 0:
        y_train = y[:begin]
        x_train = x[:begin, :]
    if fold_number < (number_of_folds - 1):
        y_train = np.concatenate((y_train, y[end:y.size]))
        x_train = np.concatenate((x_train, x[end:y.size, :]))

    x_test = x[begin : end, :]
    y_test = y[begin : end]

    return x_train, y_train, x_test, y_test

### Gradient For Stochastic Form

In [None]:
def gradient(lambdaa, b, w, x, y):

    if (1 - y*(np.dot(w, x) - b)) > 0:
        gradient_b, gradient_w = y, 2*lambdaa*w - y*x
    else:
        gradient_b, gradient_w = 0, 2*lambdaa*w

    return gradient_b, gradient_w 

### Gradient For Batch Form

In [None]:
def batch_gradient(lambdaa, b, w, x, y):

    gradient_b = 0
    gradient_w = 2*lambdaa*w
    for k in range(10):
        if (1-y[k]*(np.dot(w, x[k,:])-b)) > 0:
            gradient_b += y[k] / BATCH
            gradient_w += -y[k]*x[k,:]
        else:
            gradient_b, gradient_w = gradient_b, gradient_w

    return gradient_b, gradient_w


### Training the data for each epoch - Stochastic Form

In [None]:
def train_each_epoch(w, b, lambdaa, alpha, train_data, train_labels):

    random_indices = np.arange(train_data.shape[0])
    np.random.shuffle(random_indices)

    for k in range(train_data.shape[0]):

        gradient_b, gradient_w = gradient(lambdaa, b, w, train_data[random_indices[k], :], train_labels[random_indices[k]])
        b -= alpha*gradient_b
        w -= alpha*gradient_w

    return w, b


### Training the data for each epoch - Batch Form

In [None]:
def batch_train_each_epoch(w, b, lambdaa, alpha, train_data, train_labels):
    
    for k in range(0, train_data.shape[0], BATCH):

        gradient_b, gradient_w = batch_gradient(lambdaa, b, w, train_data[k:k+BATCH, :], train_labels[k:k+BATCH])
        b -= alpha*gradient_b
        w -= alpha*gradient_w

    return w, b


### Assigning New Labels (1 and -1) For One vs. All Method

In [None]:

def assign_new_labels(number_of_classes, x_size, y, label):

    new_label = np.zeros(x_size)
    for i in range(x_size):
        new_label[i] = 1 if y[i] == label else -1

    return new_label           

In [None]:

def predict(X, Y, w, b):
    correct_predict = 0
    for i in range(X.shape[0]):
        if Y[i]*(np.dot(w, X[i,:])-b) > 0:
            correct_predict += 1
    return correct_predict/X.shape[0]

### Finding the Best Lambda For Each Class

In [None]:
def find_best_lambda(x, y):

    epochs = 40
    lambdas = [10**-10, 10**-8, 10**-6, 10**-4, 10**-2, 1]
    best_lambdaa = np.zeros(NUMBER_OF_CLASSES)
    for label in range(NUMBER_OF_CLASSES):
        new_labels = assign_new_labels(NUMBER_OF_CLASSES, x, y, label)
        best_acc_lambdaa = np.zeros(len(lambdas))
        for l, lambdaa in enumerate(lambdas): 
            best_acc_fold = np.zeros(NUMBER_OF_FOLDS)
            for fold_number in range(NUMBER_OF_FOLDS):
                temp_predict_test = np.zeros(epochs)
                b = 0 
                w = np.random.normal(MU, SIGMA, x.shape[1])
                x_train, y_train, x_test, y_test = cross_validation(NUMBER_OF_FOLDS, fold_number, x, new_labels)
                for epoch in range(epochs):
                    #alpha = 0.1/((epoch+1)**2)
                    alpha = 0.01
                    w, b = train_each_epoch(w, b, lambdaa, alpha, x_train, y_train)
                    temp_predict_test[epoch] = predict(x_test, y_test, w, b)

                best_acc_fold[fold_number] = np.amax(temp_predict_test)
            best_acc_lambdaa[l] = np.amax(best_acc_fold)
        best_lambdaa[label] = lambdas[np.argmax(best_acc_lambdaa)]
        print("Accuracy of each lambda:", best_acc_lambdaa)
        print("lambda of label", label, "is :", best_lambdaa[label])
    return best_lambdaa

### Cost Function + L1 Regularization

In [None]:
def cost_function(lambdaa, x, y, b, w):

    cost = lambdaa*(np.linalg.norm(w)**2)
    for i in range(x.shape[0]):
        cost += (1/x.shape[0])*max(0, 1-y[i]*(np.dot(w, x[i,:]) - b))
    return cost


### Accuracy Calculation

In [None]:
def calculate_accuracy(predict, test_labels):
    count = 0
    acc = []
    for i in range(predict.shape[0]):
        if np.count_nonzero(predict[i,:]) ==  1:
            if np.where(predict[i,:] == 1)[0][0] == test_labels[i]:
                count += 1
        acc.append(count/predict.shape[0])
    print("Final accuracy for sgd:", count/ predict.shape[0])
    accuracy = count/ predict.shape[0]
    return accuracy, acc

In [None]:
def predict_test(test_data, w, b, label, prediction):

    for i in range(test_data.shape[0]):
        if (np.dot(w, test_data[i,:])-b) > 1:
            prediction[i, label] = 1

    return prediction

### Calculating Gradient Descent (Batch or Stochastic)

In [None]:
def gradient_descent(lambdas, train_data, train_labels, test_data, test_labels, label, prediction):

    epochs = 100
    lambdaa = lambdas[label]
    new_labels = assign_new_labels(NUMBER_OF_CLASSES, train_data.shape[0], train_labels, label)
    b = 0
    w = np.random.normal(MU, SIGMA, train_data.shape[1])
    cost = np.zeros(epochs)
    
    for epoch in range(epochs):
        alpha = [10**-2, 0.1/((epoch+1)**2)] #10**2*0.97**(epoch)
        w, b = train_each_epoch(w, b, lambdaa, alpha[CONSTANT], train_data, new_labels)
        #w, b = batch_train_each_epoch(w, b, lambdaa, alpha[CONSTANT], train_data, new_labels)                 #uncomment if BGD
        cost[epoch] = cost_function(lambdaa, train_data, new_labels, b, w)

    predictions = predict_test(test_data, w, b, label, prediction)
    return predictions, cost        


In [None]:
lambdas = np.array([10**-8, 10**-8, 10**-6, 10**-6, 0.0001, 10**-6, 10**-6, 10**-6, 10**-8, 10**-8])
prediction = np.zeros((test_data.shape[0],10))

for label in range(NUMBER_OF_CLASSES):
    predictions, cost = gradient_descent(lambdas, train_data, train_labels, test_data, test_labels, label, prediction)
        

In [None]:
accuracy_const_BGD, acc = calculate_accuracy(predictions, test_labels)

In [None]:
# epochs = 100

# plt.plot(np.arange(epochs), cost_con, 'pink', label = 'SGD constant stepsize')
# plt.plot(np.arange(epochs), cost_decay_BGD, 'gray', label = 'SGD decaying stepsize')
# plt.xlabel('Epochs')
# plt.ylabel('Loss Function')
# plt.legend()
# #plt.title('Stochastic Gradient Descent ')#with constant stepsize
# plt.show()
