## Softmax Regression

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pca import PCA
import argparse
import network
import os, random, sys
from data import traffic_sign, generate_k_fold_set, onehot_encode, onehot_decode, z_score_normalize, append_bias
from model.softmax import SoftmaxRegression

In [None]:
### Q6(a) - Evaluate Network on all 43 traffic signs (aligned dataset)

# Load aligned data
X, y = traffic_sign(True)
X = X.astype(np.float32) # cast to float32 as float64 running out of memory
X = z_score_normalize(X) 

print(X.shape)
print(y.shape)
print(X.dtype)
print(y.dtype)

In [None]:
## (i) With PCA on aligned

# Softmax Regression Parameters
lr = 0.5
num_features = X.shape[1]
num_classes = y.max() + 1

train_loss_record = []

train_accuracy_record = []

holdout_loss_record = []

holdout_accuracy_record = []

test_accuracy_record = []


# PCA number of principal components
n_components = 300

first_plot = True

num_epochs = 300
epochs_print = [50, 100, 150, 200, 250, 300]

k = 10

total_test_accuracy = 0.0
total_test_loss = 0.0

cur_fold = 0
for train, valid, test in generate_k_fold_set((X, y), k):
    print(f"Current Fold: {cur_fold}")
    train_data, train_label = train
    valid_data, valid_label = valid
    test_data, test_label = test
    
    # Project data onto principal components
    pca = PCA(n_components)
    projected = pca.fit_transform(train_data) # len(train_data) x n_components
    
    # Plot principal components
    if first_plot == True : 
        pca.plot_PC()
        first_plot = False
    train_d = append_bias(projected)     
    valid_d = append_bias(pca.PCA_generate(valid_data))
    test_d = append_bias(pca.PCA_generate(test_data))

    softmax_model = SoftmaxRegression(lr, n_components, num_classes)
    
    # Onehot encode labels
    y_true = onehot_encode(train_label)
    valid_label_onehot = onehot_encode(valid_label)
    
    for epoch in range(num_epochs):
        y_hat = softmax_model.model(train_d)
        
        raw_train_loss = softmax_model.cross_entropy(y_true, y_hat)
        train_loss_norm = raw_train_loss / len(train_d) / num_classes # train loss normalized by # examples and classes
        
        train_loss_record.append(train_loss_norm)
        
        train_accuracy = softmax_model.accuracy(y_true, y_hat)
        train_accuracy_record.append(train_accuracy)
        
        # TODO: early stopping, plotting std.dev., etc.
        holdout_y = softmax_model.model(valid_d)


        holdout_loss = softmax_model.cross_entropy(holdout_y, valid_label_onehot)
        holdout_loss_norm = holdout_loss / len(valid_d) / num_classes # holdout loss normalized by # examples and classes
        holdout_loss_record.append(holdout_loss_norm)

        holdout_accuracy = softmax_model.accuracy(holdout_y, valid_label_onehot)
        holdout_accuracy_record.append(holdout_accuracy)

        if holdout_accuracy >= max(holdout_accuracy_record):
            best_w = softmax_model.W
            # best_b = softmax_model.b

        # Update Weights
        softmax_model.update_weights(train_d, y_true, y_hat)

        if (epoch + 1) in epochs_print:
            print(f' epoch: {epoch + 1}, train accuracy: {train_accuracy:.4f}, train_loss_norm:{train_loss_norm:.4f}, '\
                f'valid_acc: {holdout_accuracy:.4f}, valid_loss_norm: {holdout_loss_norm:.4f}')
    # Run on Test Dataset
    test_y = softmax_model.model_w(test_d, best_w)

    test_label_onehot = onehot_encode(test_label)
    test_accuracy = softmax_model.accuracy(test_y, test_label_onehot)

    print(f'Test accuracy: {test_accuracy:.4f}')

    total_test_accuracy += test_accuracy

    raw_test_loss = softmax_model.cross_entropy(test_y, test_label_onehot)
    total_test_loss += raw_test_loss / len(test_d) / num_classes

    cur_fold += 1

print(f'Average test accuracy over {k} folds: {total_test_accuracy / k}')
print(f'Average test loss per example and class over {k} folds: {total_test_loss / k}')


In [None]:
def average_out_data_k(data, k=10):
    total_count = len(data)
    count_per_fold = total_count // k # Assumes cleanly divisble number
    new_data = [0.0 for i in range(count_per_fold)]
    for i in range(k):
        for j in range(count_per_fold):
            new_data[j] += data[i * count_per_fold + j]
    new_data = [d / k for d in new_data]
    return new_data

In [None]:
# TODO plots w/ error bars
fig, axs = plt.subplots(2, 1, constrained_layout=True)

axs[0].plot(average_out_data_k(train_loss_record), '-b', label='Training loss')
axs[0].plot(average_out_data_k(holdout_loss_record), '--r', label='Validation loss')
axs[0].legend()
axs[0].set_xlabel('Epochs')
axs[0].set_ylabel('Loss per example')
axs[0].set_title('Loss normalized per example and class vs. Epochs')

axs[1].plot(average_out_data_k(train_accuracy_record), '-b', label='Training accuracy')
axs[1].plot(average_out_data_k(holdout_accuracy_record), '--r', label='Validation accuracy')
axs[1].legend()
axs[1].set_xlabel('Epochs')
axs[1].set_ylabel('Accuracy')
axs[1].set_title('Accuracy vs. Epochs')


In [None]:
## (ii) Without PCA on aligned and with PCA on unaligned
# TODO


In [None]:
## (iii) Confusion Matrix on Test Set results
# TODO

In [None]:
### Q6(b) - Stochastic Gradient Descent

print("Q6(b) - Stochastic Gradient Descent")

# Softmax Regression Parameters
lr = 0.5
num_features = X.shape[1]
num_classes = y.max() + 1

train_loss_record = []
train_accuracy_record = []
holdout_loss_record = []
holdout_accuracy_record = []
test_accuracy_record = []


# PCA number of principal components
n_components = 300

first_plot = True

num_epochs = 300
epochs_print = [50, 100, 150, 200, 250, 300]

cur_fold = 0

for train, valid, test in generate_k_fold_set((X, y), k = 10):
    print("Cur fold:", cur_fold)
    train_data, train_label = train
    valid_data, valid_label = valid
    test_data, test_label = test
    
    # Project data onto principal components
    pca = PCA(n_components)
    projected = pca.fit_transform(train_data)
    
    # Plot principal components
    if first_plot == True : 
        pca.plot_PC()
        first_plot = False
    train_d = append_bias(projected)     
    valid_d = append_bias(pca.PCA_generate(valid_data))
    test_d = append_bias(pca.PCA_generate(test_data))

    softmax_model = SoftmaxRegression(lr, n_components, num_classes)

    valid_label_onehot = onehot_encode(valid_label)

    # SGD
    for epoch in range(num_epochs):
        print(epoch)
        # Shuffle indices
        indices = np.arange(len(train_d))
        indices = np.random.shuffle(indices)

        train_d = train_d[indices].squeeze(0)
        train_label = train_label[indices].squeeze(0)
        
        # Onehot encode labels
        y_true = onehot_encode(train_label)
            
        # Iterate over each example
        for i in range(len(train_d)):
            cur_ex = train_d[i][np.newaxis, :]
            cur_label = y_true[i][np.newaxis, :]
            y_hat = softmax_model.model(cur_ex)

            # Update Weights
            softmax_model.update_weights(cur_ex, cur_label, y_hat)

        # Training Loss
        y_hat = softmax_model.model(train_d)

        raw_train_loss = softmax_model.cross_entropy(y_true, y_hat)
        train_loss_norm = raw_train_loss / len(train_d) / num_classes

        train_loss_record.append(raw_train_loss)

        train_accuracy = softmax_model.accuracy(y_true, y_hat)
        train_accuracy_record.append(train_accuracy)

        # # Validation Loss
        # holdout_y = softmax_model.model(valid_d)

        # holdout_loss = softmax_model.cross_entropy(holdout_y, valid_label_onehot)
        # holdout_loss_norm = holdout_loss / len(valid_d) / num_classes # holdout loss normalized by # examples and classes
        # holdout_loss_record.append(holdout_loss_norm)

        # holdout_accuracy = softmax_model.accuracy(holdout_y, valid_label_onehot)
        # holdout_accuracy_record.append(holdout_accuracy)

        # if holdout_accuracy >= max(holdout_accuracy_record):
        #     best_w = softmax_model.W

        
        # if (epoch + 1) in epochs_print:
        print(f' epoch: {epoch + 1}, train accuracy: {train_accuracy:.4f}, train_loss_norm:{train_loss_norm:.4f}, '\
            f'valid_acc: {holdout_accuracy:.4f}, valid_loss_norm: {holdout_loss_norm:.4f}')

    # Run on Test Dataset
    test_y = softmax_model.model_w(test_d, best_w)

    test_label_onehot = onehot_encode(test_label)
    test_accuracy = softmax_model.accuracy(test_y, test_label_onehot)

    print(f'Test accuracy: {test_accuracy:.4f}')

    total_test_accuracy += test_accuracy

    raw_test_loss = softmax_model.cross_entropy(test_y, test_label_onehot)
    total_test_loss += raw_test_loss / len(test_d) / num_classes

    cur_fold += 1

In [None]:
### (c) Visualize the weights

def weights2range(w, min=0, max=256):
    # Scales weights to be between min and max
    w /= np.max(w)
    w *= (max)
    w += min
    return w

# Plot image