#   QUESTION 2 PART B


In [14]:
import numpy as np
import struct

# Function to load IDX format files (manual reading)
def load_idx(filename):
    with open(filename, 'rb') as f:
        # Read the magic number and dimensions
        magic, size = struct.unpack(">II", f.read(8))
        if magic == 2051:  # Magic number for images
            n_rows, n_cols = struct.unpack(">II", f.read(8))
            data = np.fromfile(f, dtype=np.uint8).reshape(size, n_rows, n_cols)
        elif magic == 2049:  # Magic number for labels
            data = np.fromfile(f, dtype=np.uint8)
        else:
            raise ValueError("Invalid magic number in IDX file")
    return data

In [15]:
train_images = load_idx('archive-2/train-images.idx3-ubyte')
train_labels = load_idx('archive-2/train-labels.idx1-ubyte')
test_images = load_idx('archive-2/t10k-images.idx3-ubyte')
test_labels = load_idx('archive-2/t10k-labels.idx1-ubyte')


In [16]:
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

In [17]:
def one_hot_encode(labels, num_classes=10):
    one_hot = np.zeros((labels.shape[0], num_classes))
    one_hot[np.arange(labels.shape[0]), labels] = 1
    return one_hot

train_labels = one_hot_encode(train_labels, 10)
test_labels = one_hot_encode(test_labels, 10)

In [18]:
conv1_filters = np.random.randn(32, 3, 3, 1) * 0.01  # 32 filters of size 3x3x1
conv2_filters = np.random.randn(64, 3, 3, 32) * 0.01  # 64 filters of size 3x3x32
fc1_weights = np.random.randn(128, 7 * 7 * 64) * 0.01  # Fully connected weights
fc1_bias = np.zeros((128, 1))
fc2_weights = np.random.randn(10, 128) * 0.01  # Output layer weights
fc2_bias = np.zeros((10, 1))


In [19]:
def relu(x):
    return np.maximum(0, x)

In [20]:
def softmax(x):
    exps = np.exp(x - np.max(x, axis=0, keepdims=True))
    return exps / np.sum(exps, axis=0, keepdims=True)


In [21]:
def conv2d(x, filters):
    n_filters, f_h, f_w, _ = filters.shape
    n_h, n_w, _ = x.shape
    output_h = n_h - f_h + 1
    output_w = n_w - f_w + 1
    output = np.zeros((output_h, output_w, n_filters))
    for h in range(output_h):
        for w in range(output_w):
            for f in range(n_filters):
                region = x[h:h+f_h, w:w+f_w, :]
                output[h, w, f] = np.sum(region * filters[f])
    return relu(output)

In [22]:
def max_pool2d(x, size=2, stride=2):
    n_h, n_w, n_c = x.shape
    output_h = (n_h - size) // stride + 1
    output_w = (n_w - size) // stride + 1 
    output = np.zeros((output_h, output_w, n_c))
    for h in range(output_h):
        for w in range(output_w):
            for c in range(n_c):
                region = x[h*stride:h*stride+size, w*stride:w*stride+size, c]
                output[h, w, c] = np.max(region)
    return output

In [23]:
def flatten(x):
    return x.flatten()

In [26]:
def forward_pass(image):

    x = conv2d(image, conv1_filters)
    print(f"After conv1: {x.shape}")
    x = conv2d(x, conv2_filters)
    print(f"After conv2: {x.shape}")
    x = max_pool2d(x)
    print(f"After max pooling: {x.shape}")
    x = flatten(x)
    print(f"After flatten: {x.shape}")
    
    fc_input_size = fc1_weights.shape[1]
    assert x.size == fc_input_size, f"Mismatch in flattened size: expected {fc_input_size}, got {x.size}"
    x = relu(np.dot(fc1_weights, x) + fc1_bias)
    print(f"After fully connected layer 1: {x.shape}")
    
    x = np.dot(fc2_weights, x) + fc2_bias
    print(f"After fully connected layer 2: {x.shape}")
    
    return softmax(x)

In [27]:
sample_image = train_images[0]
x = conv2d(sample_image, conv1_filters)
x = conv2d(x, conv2_filters)
x = max_pool2d(x)
flattened_size = x.size

In [28]:
fc1_weights = np.random.randn(128, flattened_size) * 0.01
fc1_bias = np.zeros((128, 1))

In [29]:
fc1_weights = np.random.randn(128, flattened_size) * 0.01
fc1_bias = np.zeros((128, 1))


In [30]:
image = train_images[0]
prediction = forward_pass(image)
print(f"Predicted probabilities: {prediction}")

After conv1: (26, 26, 32)
After conv2: (24, 24, 64)
After max pooling: (12, 12, 64)
After flatten: (9216,)
After fully connected layer 1: (128, 128)
After fully connected layer 2: (10, 128)
Predicted probabilities: [[0.1        0.10000762 0.10000932 ... 0.10000613 0.1        0.1       ]
 [0.1        0.10000743 0.10000909 ... 0.10000598 0.1        0.1       ]
 [0.1        0.10000501 0.10000613 ... 0.10000404 0.1        0.1       ]
 ...
 [0.1        0.10001907 0.10002334 ... 0.10001535 0.1        0.1       ]
 [0.1        0.10000339 0.10000415 ... 0.10000273 0.1        0.1       ]
 [0.1        0.10000314 0.10000384 ... 0.10000253 0.1        0.1       ]]


# QUESTION 1  

In [4]:
import numpy as np
import idx2numpy

# Load and preprocess data
def load_and_preprocess_data():
    # Loading data from idx files
    train_images = idx2numpy.convert_from_file('archive-2/train-images.idx3-ubyte')
    train_labels = idx2numpy.convert_from_file('archive-2/train-labels.idx1-ubyte')
    test_images = idx2numpy.convert_from_file('archive-2/t10k-images.idx3-ubyte')
    test_labels = idx2numpy.convert_from_file('archive-2/t10k-labels.idx1-ubyte')

    # Subsetting and flattening data
    subset_size = 10000
    train_images_subset = train_images[:subset_size]
    train_labels_subset = train_labels[:subset_size]
    train_images_flattened = train_images_subset.reshape(subset_size, -1)

    # Splitting data into train and test sets
    split_index = int(0.8 * subset_size)
    train_images_train = train_images_flattened[:split_index]
    train_labels_train = train_labels_subset[:split_index]
    train_images_test = train_images_flattened[split_index:]
    train_labels_test = train_labels_subset[split_index:]

    return train_images_train, train_labels_train, train_images_test, train_labels_test

In [5]:

class LinearSVM:
    def __init__(self, learning_rate=0.01, lambda_param=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.lambda_param = lambda_param
        self.epochs = epochs
        self.weights = None
        self.bias = 0

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)

        # Convert labels to +1 and -1 for binary classification
        y = np.where(y <= 0, -1, 1)

        for _ in range(self.epochs):
            for idx, x_i in enumerate(X):
                condition = y[idx] * (np.dot(x_i, self.weights) - self.bias) >= 1
                if condition:
                    self.weights -= self.learning_rate * (2 * self.lambda_param * self.weights)
                else:
                    self.weights -= self.learning_rate * (2 * self.lambda_param * self.weights - np.dot(x_i, y[idx]))
                    self.bias -= self.learning_rate * y[idx]

    def predict(self, X):
        linear_output = np.dot(X, self.weights) - self.bias
        return np.sign(linear_output)

In [6]:
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred)

# Main execution
train_images_train, train_labels_train, train_images_test, train_labels_test = load_and_preprocess_data()

# Train Linear SVM
svm_clf_linear = LinearSVM(learning_rate=0.001, lambda_param=0.01, epochs=1000)
svm_clf_linear.fit(train_images_train, train_labels_train)

# Predictions and accuracy
train_predictions_linear = svm_clf_linear.predict(train_images_train)
train_accuracy_linear = accuracy(train_labels_train, train_predictions_linear)
print(f"Training accuracy for linear SVM without scaling: {train_accuracy_linear * 100:.2f}%")

test_predictions_linear = svm_clf_linear.predict(train_images_test)
test_accuracy_linear = accuracy(train_labels_test, test_predictions_linear)
print(f"Testing accuracy for linear SVM without scaling: {test_accuracy_linear * 100:.2f}%")

Training accuracy for linear SVM without scaling: 11.33%
Testing accuracy for linear SVM without scaling: 11.05%
