In [42]:

import pickle, gzip, numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import math


In [57]:
def plot_images(X):
    if X.ndim == 1:
        X = np.array([X])
    num_images = X.shape[0]
    num_rows = math.floor(math.sqrt(num_images))
    num_cols = math.ceil(num_images/num_rows)
    for i in range(num_images):
        reshaped_image = X[i,:].reshape(28,28)
        plt.subplot(num_rows, num_cols, i+1)
        plt.imshow(reshaped_image, cmap = cm.Greys_r)
        plt.axis('off')
    plt.show()


def pick_examples_of(X, Y, labels, total_count):
    bool_arr = None
    for label in labels:
        bool_arr_for_label = (Y == label)
        if bool_arr is None:
            bool_arr = bool_arr_for_label
        else:
            bool_arr |= bool_arr_for_label
    filtered_x = X[bool_arr]
    filtered_y = Y[bool_arr]
    return (filtered_x[:total_count], filtered_y[:total_count])


def extract_training_and_test_examples_with_labels(train_x, train_y, test_x, test_y, labels, training_count, test_count):
    filtered_train_x, filtered_train_y = pick_examples_of(train_x, train_y, labels, training_count)
    filtered_test_x, filtered_test_y = pick_examples_of(test_x, test_y, labels, test_count)
    return (filtered_train_x, filtered_train_y, filtered_test_x, filtered_test_y)

def write_pickle_data(data, file_name):
    f = gzip.open(file_name, 'wb')
    pickle.dump(data, f)
    f.close()

def read_pickle_data(file_name):
    f = gzip.open(file_name, 'rb')
    data = pickle.load(f, encoding='latin1')
    f.close()
    return data

def get_MNIST_data():
    """
    Reads mnist dataset from file

    Returns:
        train_x - 2D Numpy array (n, d) where each row is an image
        train_y - 1D Numpy array (n, ) where each row is a label
        test_x  - 2D Numpy array (n, d) where each row is an image
        test_y  - 1D Numpy array (n, ) where each row is a label

    """
    train_set, valid_set, test_set = read_pickle_data('../Datasets/mnist.pkl.gz')
    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    train_x = np.vstack((train_x, valid_x))
    train_y = np.append(train_y, valid_y)
    test_x, test_y = test_set
    return (train_x, train_y, test_x, test_y)

def load_train_and_test_pickle(file_name):
    train_x, train_y, test_x, test_y = read_pickle_data(file_name)
    return train_x, train_y, test_x, test_y

# returns the feature set in a numpy ndarray
def load_CSV(filename):
    stuff = np.asarray(np.loadtxt(open(filename, 'rb'), delimiter=','))
    return stuff


In [13]:
X= np.matrix([[0.3611579,  0.96226716, 0.98064663, 0.04055572],
 [0.94149945, 0.72212435, 0.37570675, 0.68049934],
 [0.19970288, 0.65725451, 0.57015567, 0.7615745 ],
 [0.77121656, 0.78703874, 0.3070905,  0.77986302],
 [0.39350407, 0.51370065, 0.11471555, 0.23838116],
 [0.01483955, 0.41598833, 0.25161566, 0.59752867],
 [0.02430031, 0.42870227, 0.94195133, 0.88919031],
 [0.13034396, 0.19332577, 0.51945432, 0.67423343],
 [0.86119595, 0.94570355, 0.43848299, 0.54881639],
 [0.44029438, 0.17558378, 0.66478925, 0.02588764],
 [0.64276425, 0.65239233, 0.45950711, 0.48295596],
 [0.68698576, 0.06876205, 0.24937567, 0.07892581],
 [0.3391765,  0.86031843, 0.13269297, 0.60476224],
 [0.31696672, 0.45987932, 0.34364184, 0.5313443 ],
 [0.46187926, 0.61927595, 0.06312921, 0.55335085],
 [0.14107315, 0.42795399, 0.51626323, 0.72871875],
 [0.51085382, 0.31483097, 0.54478224, 0.14754355],
 [0.07806636, 0.40740571, 0.50336713, 0.99114164],
 [0.72788821, 0.80005046, 0.45626062, 0.93235056]])
Y= [0.54078807,0.44839133, 0.37717679, 0.22286258, 0.00623512, 0.3943331,
 0.36932991, 0.72361028, 0.15765059, 0.3836109,  0.62684008, 0.35196231,
 0.20906979, 0.79155847, 0.46009281, 0.94344581, 0.14387682, 0.69018976,
 0.52659895]
lambda_factor= 0.9669267576737237

In [29]:
X.shape[1]

4

# Closed Form Solution of Linear Regression


In [59]:
### Functions for you to fill in ###

def closed_form(X, Y, lambda_factor):
    
    """
    Computes the closed form solution of linear regression with L2 regularization

    Args:
        X - (n, d + 1) NumPy array (n datapoints each with d features plus the bias feature in the first dimension)
        Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
            data point
        lambda_factor - the regularization constant (scalar)
    Returns:
   
        theta - (d + 1, ) NumPy array containing the weights of linear regression. Note that theta[0]
        represents the y-axis intercept of the model and therefore X[0] = 1
    """
    # YOUR CODE HERE
#     theta = (np.transpose(X)*X+lambda_factor)
#     theta = np.linalg.inv(X.T.dot(X) + lambda_factor*np.eye(X.shape[1])).dot(X.T).dot(Y)
    theta = np.linalg.inv(X.T.dot(X) + lambda_factor*np.eye(X.shape[1], dtype=int)).dot(X.T).dot(Y)
    return theta
#     theta = np.dot(np.dot(np.linalg.inv(np.dot(X, np.transpose(X)) + lambda_factor*np.eye(X.shape[1], dtype=int)), np.transpose(X)),Y)
    
 
    
#     raise NotImplementedError

### Functions which are already complete, for you to use ###

In [28]:
A=closed_form(X, Y, lambda_factor)
A

matrix([[0.05545423, 0.04987305, 0.3426774 , 0.35276135]])

# Test Error on Linear Regression

In [60]:
get_MNIST_data()

FileNotFoundError: [Errno 2] No such file or directory: '../Datasets/mnist.pkl.gz'

In [48]:
test_x

NameError: name 'test_x' is not defined

In [64]:
read_pickle_data('mnist.pkl.gz')

FileNotFoundError: [Errno 2] No such file or directory: 'mnist.pkl.gz'

In [45]:
test_x

NameError: name 'test_x' is not defined

In [31]:
def compute_test_error_linear(test_x, Y, theta):
    test_y_predict = np.round(np.dot(test_x, theta))
    test_y_predict[test_y_predict < 0] = 0
    test_y_predict[test_y_predict > 9] = 9
    return 1 - np.mean(test_y_predict == Y)

In [32]:
B=compute_test_error_linear(test_x, Y, theta)
B

NameError: name 'test_x' is not defined

In [None]:
import numpy as np

### Functions for you to fill in ###

def closed_form(X, Y, lambda_factor):
    """
    Computes the closed form solution of linear regression with L2 regularization

    Args:
        X - (n, d + 1) NumPy array (n datapoints each with d features plus the bias feature in the first dimension)
        Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
            data point
        lambda_factor - the regularization constant (scalar)
    Returns:
   
        theta - (d + 1, ) NumPy array containing the weights of linear regression. Note that theta[0]
        represents the y-axis intercept of the model and therefore X[0] = 1
    """
    # YOUR CODE HERE
    raise NotImplementedError

### Functions which are already complete, for you to use ###

def compute_test_error_linear(test_x, Y, theta):
    test_y_predict = np.round(np.dot(test_x, theta))
    test_y_predict[test_y_predict < 0] = 0
    test_y_predict[test_y_predict > 9] = 9
    return 1 - np.mean(test_y_predict == Y)

In [66]:
 import utils.py as ut

ModuleNotFoundError: No module named 'utils'