# CS613 Final Project: Image Classification 

In [1]:
import os
import numpy as np
import pandas as pd
import pickle

In [5]:
def unpickle(file):
    with open(file, 'rb') as fo:
        return pickle.load(fo, encoding='bytes')

In [18]:
def load_cifar10():
    train_data, train_labels = [] , []
    for i in range(1,6):
        batch = unpickle(f"./cifar-10-python/cifar-10-batches-py/data_batch_{i}")
        train_data.append(batch[b'data'])
        train_labels.extend(batch[b'labels'])
    train_data = np.concatenate(train_data, axis=0)
    train_labels = np.array(train_labels)

    # Load test batch
    test_batch = unpickle(f"./cifar-10-python/cifar-10-batches-py/test_batch")
    test_data = np.array(test_batch[b'data'])
    test_labels = np.array(test_batch[b'labels'])

    # Reshape the data to (N, 32, 32, 3)
    train_data = train_data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
    test_data = test_data.reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)

    return train_data, train_labels, test_data, test_labels

In [36]:
# Save data to CSV
def save_to_csv(data, labels, file_path):
    # Combine labels and data
    combined = np.column_stack((labels, data))
    # Save as a CSV file
    np.savetxt(file_path, combined, delimiter=",", fmt="%f")
    print(f"Saved {file_path} successfully!")

In [30]:
# Prepare data
def normalize_images(data):
    return data / 255.0

# Convert labels to one-hot encoding
def one_hot_encode(labels, num_classes):
    one_hot = np.zeros((labels.size, num_classes))
    one_hot[np.arange(labels.size),labels] = 1
    return one_hot

# SVM implementation
class SVM:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, n_epochs=10):
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.n_epochs = n_epochs
        self.weights = None
        self.biases = None
        self.accuracy_per_epoch = []

    def fit(self, X, y, num_classes, x_test = None, y_test = None):
        n_samples, n_features = X.shape
        self.weights = np.zeros((num_classes, n_features))
        self.biases = np.zeros(num_classes)

        for epoch in range(self.n_epochs):
            for c in range(num_classes):
                y_binary = np.where(y == c, 1, -1)
                w = self.weights[c,:]
                b = self.biases[c]
                
                for idx, x_i in enumerate(X):
                    condition = y_binary[idx] * (np.dot(x_i, w) - b) # Check if current sample passes margin condition
                    if condition >= 1:
                        w -= self.learning_rate * (2 * self.lambda_param * w)
                    else:
                        w -= self.learning_rate * (2 * self.lambda_param * w - np.dot(x_i, y_binary[idx]))
                        b -= self.learning_rate * y_binary[idx]
                            
                self.weights[c, :] = w
                self.biases[c] = b 
            
            # Evaluate after each epoch
            if x_test is not None and y_test is not None:
                y_pred = self.predict(x_test)
                accuracy = np.mean(y_pred == y_test)
                self.accuracy_per_epoch.append(accuracy)
                print(f"Epoch {epoch + 1}/{self.n_epochs} - Accuracy: {accuracy * 100:.2f}%")
                         
    def predict(self, X):
        linear_output = np.dot(X, self.weights.T) - self.biases
        return np.argmax(linear_output, axis=1)

In [37]:
# Load CIFAR-10 dataset
print("Loading CIFAR-10 dataset...")
x_train, y_train, x_test, y_test = load_cifar10()

# Preprocess data
print("Preprocessing data...")
x_train = normalize_images(x_train).reshape(x_train.shape[0], -1)
x_test = normalize_images(x_test).reshape(x_test.shape[0],-1)
num_classes = 10

print("Saving to CSV...")
save_to_csv(x_train, y_train, "train.csv")
save_to_csv(x_test, y_test, "test.csv")
print("CSV files created.")

# Train the SVM
print("Training SVM...")
svm = SVM(learning_rate=1e-4, lambda_param=0.01, n_epochs=5)
svm.fit(x_train, y_train, num_classes, x_test=x_test, y_test=y_test)

# Evaluate the model
print("Evaluating SVM...")
y_pred = svm.predict(x_test)

# Calculate accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy * 100:.2f}%")

Loading CIFAR-10 dataset...
Preprocessing data...
Saving to CSV...
Saved train.csv successfully!
Saved test.csv successfully!
CSV files created.
Training SVM...
Epoch 1/5 - Accuracy: 28.60%
Epoch 2/5 - Accuracy: 31.13%
Epoch 3/5 - Accuracy: 30.88%
Epoch 4/5 - Accuracy: 30.44%
Epoch 5/5 - Accuracy: 31.17%
Evaluating SVM...
Accuracy: 31.17%
