In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
sys.path.append("..")
from utils import *
from linear_regression import *
from softmax import *
from features import *
from kernel import *



#######################################################################
# 1. Introduction
#######################################################################


In [None]:
# Load MNIST data:
train_x, train_y, test_x, test_y = get_MNIST_data()
print(train_x.shape, train_y.shape)

To solve the linear regression problem, you recall the linear regression has a closed form solution:

```
    theta = ((transpose(X).X+lambda* I) ^ -1). transpose(X).Y
```

where I = identity matrix

lambda = regularization parameter

X = input feature

Y = output label


In [None]:
def IdentityMatrix(n):
    imat = []
    for r in range(n):
        row = []
        for c in range(n):
            if r==c:
                row.append(1)
            else:
                row.append(0)
        imat.append(row)
    return imat
    
def DiagonalMatrix(n,l):
    dmat = []
    for r in range(n):
        row = []
        for c in range(n):
            if r==c:
                row.append(l)
            else:
                row.append(0)
        dmat.append(row)
    return dmat
def H_Func(n):
    one = np.ones(n)
    i = IdentityMatrix(n)
    H = np.subtract(i, np.dot(1/n, np.matmul(one, np.transpose(one))))
    return H

def gramMatrix(vectorList):
    V = np.array(vectorList)
    G = V.dot(V.T)
    return G

def eigenValues(vector):
    w,v  = np.linalg.eig(vector)
    return np.round(w,3)

def eigenVector(vector):
    w,v  = np.linalg.eig(vector)
    return np.round(v,3)

In [None]:
def closed_form(X, Y, lambda_factor):
    """
    Computes the closed form solution of linear regression with L2 regularization

    Args:
        X - (n, d + 1) NumPy array (n datapoints each with d features plus the bias feature in the first dimension)
        Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
            data point
        lambda_factor - the regularization constant (scalar)
    Returns:
        theta - (d + 1, ) NumPy array containing the weights of linear regression. Note that theta[0]
        represents the y-axis intercept of the model and therefore X[0] = 1
    """
    # create Identity matrix for the number of data points. 
    print(X.shape, Y.shape)
    I = IdentityMatrix(X.shape[1])    
    step1 = np.dot(X.T,X)
    step2 = np.add(step1, np.dot(lambda_factor, I))
    step3 = np.linalg.inv(step2)
    theta = np.dot(step3, np.dot(X.T,Y))
    return theta


In [None]:
n = 10
X = train_x[:n]
Y = train_y[:n]
theta_0 = np.ones((n,1))
print(X.shape, theta_0.shape)
X = np.hstack([X,theta_0]) 
theta = closed_form(X, Y, 0.1)
print(theta.shape)

#######################################################################
# 2. Linear Regression with Closed Form Solution
#######################################################################

## Test Error on Linear Regression

Apply the linear regression model on the test set. For classification purpose, you decide to round the predicted label into numbers 0-9.

**Note:** For this project we will be looking at the error rate defined as the fraction of labels that don't match the target labels, also known as the "gold labels" or ground truth. (In other context, you might want to consider other performance measures such as precision and recall, which we have not discussed in this course).

In [None]:
def run_linear_regression_on_MNIST(lambda_factor=1):
    """
    Trains linear regression, classifies test data, computes test error on test set

    Returns:
        Final test error
    """
    train_x, train_y, test_x, test_y = get_MNIST_data()
    train_x_bias = np.hstack([np.ones([train_x.shape[0], 1]), train_x])
    test_x_bias = np.hstack([np.ones([test_x.shape[0], 1]), test_x])
    theta = closed_form(train_x_bias, train_y, lambda_factor)
    test_error = compute_test_error_linear(test_x_bias, test_y, theta)
    return test_error


# Don't run this until the relevant functions in linear_regression.py have been fully implemented.
print('Linear Regression test_error =', run_linear_regression_on_MNIST(lambda_factor=1))
print('Linear Regression test_error =', run_linear_regression_on_MNIST(lambda_factor=0.1))
print('Linear Regression test_error =', run_linear_regression_on_MNIST(lambda_factor=0.01))

From the above, we see that no matter what lambda factor is used, the test error is large. What can be wrong in this approach?

Which of the following can be true in this approach:

- Gradient descent should be used instead of the closed form solution. - TRUE

- The loss function related to the closed-form solution is inadequate for this problem.

- Regularization should not be used here.

#######################################################################
# 3. Support Vector Machine
#######################################################################

In [None]:

from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier

def one_vs_rest_svm(train_x, train_y, test_x):
    """
    Trains a linear SVM for binary classifciation

    Args:
        train_x - (n, d) NumPy array (n datapoints each with d features)
        train_y - (n, ) NumPy array containing the labels (0 or 1) for each training data point
        test_x - (m, d) NumPy array (m datapoints each with d features)
    Returns:
        pred_test_y - (m,) NumPy array containing the labels (0 or 1) for each test data point
    """
    clf = LinearSVC(random_state=0, C=0.1)
    clf.fit(train_x, train_y)
    pred_test_y = clf.predict(test_x)
    return pred_test_y
    # raise NotImplementedError



def compute_test_error_svm(test_y, pred_test_y):
    return 1 - np.mean(pred_test_y == test_y)
    
def run_svm_one_vs_rest_on_MNIST():
    """
    Trains svm, classifies test data, computes test error on test set

    Returns:
        Test error for the binary svm
    """
    train_x, train_y, test_x, test_y = get_MNIST_data()
    train_y[train_y != 0] = 1
    test_y[test_y != 0] = 1
    pred_test_y = one_vs_rest_svm(train_x, train_y, test_x)
    test_error = compute_test_error_svm(test_y, pred_test_y)
    return test_error


print('SVM one vs. rest test_error:', run_svm_one_vs_rest_on_MNIST())

In [None]:
def multi_class_svm(train_x, train_y, test_x):
    """
    Trains a linear SVM for multiclass classifciation using a one-vs-rest strategy

    Args:
        train_x - (n, d) NumPy array (n datapoints each with d features)
        train_y - (n, ) NumPy array containing the labels (int) for each training data point
        test_x - (m, d) NumPy array (m datapoints each with d features)
    Returns:
        pred_test_y - (m,) NumPy array containing the labels (int) for each test data point
    """
    clf = LinearSVC(random_state=0, C=0.1)
    clf.fit(train_x, train_y)
    pred_test_y = clf.predict(test_x)
    return pred_test_y

def run_multiclass_svm_on_MNIST():
    """
    Trains svm, classifies test data, computes test error on test set

    Returns:
        Test error for the binary svm
    """
    train_x, train_y, test_x, test_y = get_MNIST_data()
    pred_test_y = multi_class_svm(train_x, train_y, test_x)
    print(pred_test_y[:10])
    test_error = compute_test_error_svm(test_y, pred_test_y)
    return test_error


print('Multiclass SVM test_error:', run_multiclass_svm_on_MNIST())

#######################################################################
# 4. Multinomial (Softmax) Regression and Gradient Descent
#######################################################################

In [None]:
from test import *
import softmax
theta = np.array([
 [ 0,  1,  2,  3,  4],
 [ 5,  6,  7,  8,  9],
 [10, 11, 12, 13, 14],
 [15, 16, 17, 18, 19],
 [20, 21, 22, 23, 24],
 [25, 26, 27, 28, 29],
 [30, 31, 32, 33, 34]
])
X = np.array([
 [ 0,  1,  2,  3,  4],
 [ 5,  6,  7,  8,  9],
 [10, 11, 12, 13, 14]
])
t= 0.2
Expected = [[0., 0., 0.],
 [0., 0., 0.],
 [0., 0., 0.],
 [0., 0., 0.],
 [0., 0., 0.],
 [0., 0., 0.],
 [1., 1., 1.]]
softmax.compute_probabilities(X, theta, t)
# check_compute_probabilities()


In [None]:
from test import *
check_run_gradient_descent_iteration()

In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
sys.path.append("..")
from utils import *
from softmax import *

def run_softmax_on_MNIST(temp_parameter=1):
    """
    Trains softmax, classifies test data, computes test error, and plots cost function

    Runs softmax_regression on the MNIST training set and computes the test error using
    the test set. It uses the following values for parameters:
    alpha = 0.3
    lambda = 1e-4
    num_iterations = 150

    Saves the final theta to ./theta.pkl.gz

    Returns:
        Final test error
    """
    train_x, train_y, test_x, test_y = get_MNIST_data()
    theta, cost_function_history = softmax_regression(train_x, train_y, temp_parameter, alpha=0.3, lambda_factor=1.0e-4, k=10, num_iterations=150)
    plot_cost_function_over_time(cost_function_history)
    test_error = compute_test_error(test_x, test_y, theta, temp_parameter)
    # Save the model parameters theta obtained from calling softmax_regression to disk.
    write_pickle_data(theta, "./theta.pkl.gz")

    # TODO: add your code here for the "Using the Current Model" question in tab 6.
    #      and print the test_error_mod3
    train_y_mod3, test_y_mod3 = update_y(train_y,test_y)
    test_error = compute_test_error_mod3(test_x,test_y_mod3, theta, temp_parameter)
    return test_error


print('softmax test_error=', run_softmax_on_MNIST(temp_parameter=2))


#######################################################################
# 6. Changing Labels
#######################################################################


In [None]:
import sys
import numpy as np
import matplotlib.pyplot as plt
sys.path.append("..")
from utils import *
from linear_regression import *
from softmax import *
from features import *
from kernel import *


def run_softmax_on_MNIST_mod3(temp_parameter=1):
    """
    Trains Softmax regression on digit (mod 3) classifications.

    See run_softmax_on_MNIST for more info.
    """
    # YOUR CODE HERE
    train_x, train_y, test_x, test_y = get_MNIST_data()
    train_y_mod3, test_y_mod3 = update_y(train_y, test_y)
    theta, cost_function_history = softmax_regression(train_x, train_y_mod3, temp_parameter, alpha= 0.3, lambda_factor = 1.0e-4, k = 10, num_iterations = 150)
    plot_cost_function_over_time(cost_function_history)
    test_error = compute_test_error(test_x, test_y_mod3, theta, temp_parameter)
    return test_error
    # raise NotImplementedError


# # TODO: Run run_softmax_on_MNIST_mod3(), report the error rate
print('softmax test_error(t=1)=', run_softmax_on_MNIST_mod3(temp_parameter=1))

#######################################################################
# 7. Classification Using Manually Crafted Features
#######################################################################

## Dimensionality reduction via PCA ##

**TODO:** First fill out the PCA functions in features.py as the below code depends on them.


In [None]:

n_components = 18

###Correction note:  the following 4 lines have been modified since release.
train_x_centered, feature_means = center_data(train_x)
pcs = principal_components(train_x_centered)
train_pca = project_onto_PC(train_x, pcs, n_components, feature_means)
test_pca = project_onto_PC(test_x, pcs, n_components, feature_means)

# train_pca (and test_pca) is a representation of our training (and test) data
# after projecting each example onto the first 18 principal components.
# # TODO: Train your softmax regression model using (train_pca, train_y)
# #       and evaluate its accuracy on (test_pca, test_y).
theta, cost_function_history = softmax_regression(train_pca, train_y, temp_parameter=1, alpha= 0.3, lambda_factor = 1.0e-4, k = 10, num_iterations = 150)
# plot_cost_function_over_time(cost_function_history)
test_error = compute_test_error(test_pca, test_y, theta, temp_parameter=1)
print("Test error with 18-dim PCA representation:", test_error)

In [None]:
# # TODO: Use the plot_PC function in features.py to produce scatterplot
# #       of the first 100 MNIST images, as represented in the space spanned by the
# #       first 2 principal components found above.
plot_PC(train_x[range(0, 100), ], pcs, train_y[range(0, 100)], feature_means)#feature_means added since release


In [None]:
# # TODO: Use the reconstruct_PC function in features.py to show
# #       the first and second MNIST images as reconstructed solely from
# #       their 18-dimensional principal component representation.
# #       Compare the reconstructed images with the originals.
firstimage_reconstructed = reconstruct_PC(train_pca[0, ], pcs, n_components, train_x, feature_means)#feature_means added since release
plot_images(firstimage_reconstructed)
plot_images(train_x[0, ])

secondimage_reconstructed = reconstruct_PC(train_pca[1, ], pcs, n_components, train_x, feature_means)#feature_means added since release
plot_images(secondimage_reconstructed)
plot_images(train_x[1, ])

In [None]:

n_components = 10

###Correction note:  the following 4 lines have been modified since release.
train_x_centered, feature_means = center_data(train_x)
pcs = principal_components(train_x_centered)
train_pca10 = project_onto_PC(train_x, pcs, n_components, feature_means)
test_pca10 = project_onto_PC(test_x, pcs, n_components, feature_means)

# train_pca (and test_pca) is a representation of our training (and test) data
# after projecting each example onto the first 18 principal components.
# # TODO: Train your softmax regression model using (train_pca, train_y)
# #       and evaluate its accuracy on (test_pca, test_y).
theta, cost_function_history = softmax_regression(train_pca10, train_y, temp_parameter=1, alpha= 0.3, lambda_factor = 1.0e-4, k = 10, num_iterations = 150)
# plot_cost_function_over_time(cost_function_history)
test_error10 = compute_test_error(test_pca, test_y, theta, temp_parameter=1)
print("Test error with 10-dim PCA representation:", test_error10)

# ## Cubic Kernel ##
# # TODO: Find the 10-dimensional PCA representation of the training and test set

In [None]:
n_components = 10
pcs = principal_components(train_x)
train_pca10 = project_onto_PC(train_x, pcs, n_components,feature_means)
test_pca10 = project_onto_PC(test_x, pcs, n_components,feature_means)


theta, cost_function_history = softmax_regression(train_pca10, train_y, temp_parameter=1, alpha= 0.3, lambda_factor = 1.0e-4, k = 10, num_iterations = 150)
# plot_cost_function_over_time(cost_function_history)
test_error = compute_test_error(test_pca10, test_y, theta, temp_parameter=1)
print("Test error with 18-dim PCA representation:", test_error)

In [None]:

# TODO: First fill out cubicFeatures() function in features.py as the below code requires it.

train_cube = cubic_features(train_pca10)
test_cube = cubic_features(test_pca10)

# # train_cube (and test_cube) is a representation of our training (and test) data
# # after applying the cubic kernel feature mapping to the 10-dimensional PCA representations.


# # TODO: Train your softmax regression model using (train_cube, train_y)
# #       and evaluate its accuracy on (test_cube, test_y).
theta, cost_function_history = softmax_regression(train_cube, train_y, temp_parameter=1, alpha= 0.3, lambda_factor = 1.0e-4, k = 10, num_iterations = 150)
plot_cost_function_over_time(cost_function_history)
test_error = compute_test_error(test_cube, test_y, theta, temp_parameter=1)
print("Test error with 10-dim PCA with cubic features:", test_error)