# Question 2
**Ridge Regression and LASSO**

In [1]:
import numpy as np

np.random.seed(23)
# Generate the dataset randomly based on the question
X = np.random.rand(150, 75)
theta = np.zeros(75)
for i in xrange(10):
    r = np.random.randint(0, 2)
    if r == 0:
        theta[i] = -10
    else:
        theta[i] = 10
epsilon = 0.1 * np.random.randn(150)
# Generate target labels as y = X*theta + epsilon
y = X.dot(theta) + epsilon

# Split the dataset into train, validation and test sets
train_X = X[:80]
train_y = y[:80]
valid_X = X[80:100]
valid_y = y[80:100]
test_X = X[100:150]
test_y = y[100:150]

In [2]:
# Calculates mean square error given true values and predicted values as MSE = ||y - y'||^2/no_samples
def mse(true_y, pred_y):
    return np.linalg.norm(true_y - pred_y)**2/len(true_y)

# Train a given classifier on train set and check MSE on validation set
def train_clf(clf):
    clf.fit(train_X, train_y)
    predict_y = clf.predict(valid_X)
    print "Lambda :",clf.alpha,"\tValidation set MSE :",mse(valid_y, predict_y)

# Predict on test set after training on whole train and validation sets and
# report MSE, number of weights modified as required by question
def test_clf(clf):
    clf.fit(np.concatenate((train_X,valid_X)), np.concatenate((train_y,valid_y)))
    predict_y = clf.predict(test_X)
    print "Lambda :",clf.alpha,"\tTest set MSE :",mse(test_y, predict_y)

    # Calculate the number of weights that changed from zero to non-zero and vice versa based
    # on both using absoluate value and thresold value as asked by the question
    th = 0.01 # Thresold
    # Differnet counters
    count_0_to_non0 = 0
    count_non0_to_0 = 0
    count_th_0_to_non0 = 0
    count_th_non0_to_0 = 0

    # Calculate the counters and print them
    for i in xrange(75):
        if i < 10:
            if clf.coef_[i] == 0:
                count_non0_to_0 += 1
            if clf.coef_[i] < th and clf.coef_[i] > -th:
                count_th_non0_to_0 += 1
        else:
            if clf.coef_[i] != 0:
                count_0_to_non0 += 1
            if not(clf.coef_[i] < th and clf.coef_[i] > -th):
                count_th_0_to_non0 += 1

    #print "Learned weights :",clf.coef_           
    print "Weight components that became zero from non zero :",count_non0_to_0
    print "Weight components that became non zero from zero :",count_0_to_non0
    print "Weight components that became zero (below thresold) from non zero :",count_th_non0_to_0
    print "Weight components that became non zero (above thresold) from zero :",count_th_0_to_non0

# My implementation of Ridge

In [3]:
from scipy.optimize import minimize

# My implemenation for Ridge Regressor
class MyRidge:
    # Set lambda parameter
    def __init__(self, alpha=0.1):
        self.alpha = alpha
    
    # Optimization function L = lambda * ||W||^2 + ||y - XW||^2
    def opt_fun(self, w):
        return self.alpha*np.linalg.norm(w)**2 + np.linalg.norm(self.y - self.X.dot(w))**2
    
    # Calcualte the weights minimizing opt func using given train set
    def fit(self, X, y):
        self.X = X
        self.y = y
        self.coef_ = minimize(self.opt_fun, np.ones(len(X[0]))).x
    
    # Predict the values on test set using weights learnt from fit
    def predict(self, X):
        return X.dot(self.coef_)

In [4]:
# Run MyRidge for different lambdas and calculate lambda which gives min MSE
train_clf(MyRidge(alpha=1))
train_clf(MyRidge(alpha=0.1))
train_clf(MyRidge(alpha=0.01))
train_clf(MyRidge(alpha=0.008))
train_clf(MyRidge(alpha=0.005))
train_clf(MyRidge(alpha=0.002))
train_clf(MyRidge(alpha=0.001))

Lambda : 1 	Validation set MSE : 9.18207618636
Lambda : 0.1 	Validation set MSE : 1.29762852376
Lambda : 0.01 	Validation set MSE : 0.0714807303795
Lambda : 0.008 	Validation set MSE : 0.0552144120455
Lambda : 0.005 	Validation set MSE : 0.0436139706537
Lambda : 0.002 	Validation set MSE : 0.0607530968025
Lambda : 0.001 	Validation set MSE : 0.0777612683257


In [5]:
# Train on train + validation set, report MSE on test set and the number of changed weights
test_clf(MyRidge(alpha=0.005))

Lambda : 0.005 	Test set MSE : 0.0619188660745
Weight components that became zero from non zero : 0
Weight components that became non zero from zero : 65
Weight components that became zero (below thresold) from non zero : 0
Weight components that became non zero (above thresold) from zero : 59


# Ridge from sklearn

In [6]:
from sklearn.linear_model import Ridge
# Run sklearn Ridge for lambda obtained from above my Ridge implementation (for verification)
train_clf(Ridge(alpha=0.005))

Lambda : 0.005 	Validation set MSE : 0.147104467893


In [7]:
# Train on train + validation set, report MSE on test set and the number of changed weights
test_clf(Ridge(alpha=0.005))

Lambda : 0.005 	Test set MSE : 0.0620698315891
Weight components that became zero from non zero : 0
Weight components that became non zero from zero : 65
Weight components that became zero (below thresold) from non zero : 0
Weight components that became non zero (above thresold) from zero : 58


# Lasso from sklearn

In [8]:
from sklearn.linear_model import Lasso

# Run Lasso for different lambdas and calculate lambda which gives min MSE
train_clf(Lasso(alpha=1))
train_clf(Lasso(alpha=0.1))
train_clf(Lasso(alpha=0.05))
train_clf(Lasso(alpha=0.02))
train_clf(Lasso(alpha=0.01))
train_clf(Lasso(alpha=0.005))
train_clf(Lasso(alpha=0.002))
train_clf(Lasso(alpha=0.001))

Lambda : 1 	Validation set MSE : 94.9262697012
Lambda : 0.1 	Validation set MSE : 1.2068891066
Lambda : 0.05 	Validation set MSE : 0.298884619009
Lambda : 0.02 	Validation set MSE : 0.0520786059261
Lambda : 0.01 	Validation set MSE : 0.0198349808111
Lambda : 0.005 	Validation set MSE : 0.0120266106574
Lambda : 0.002 	Validation set MSE : 0.0106285352288
Lambda : 0.001 	Validation set MSE : 0.0139540096442


In [9]:
# Train on train + validation set, report MSE on test set and the number of changed weights
test_clf(Lasso(alpha=0.002))

Lambda : 0.002 	Test set MSE : 0.015108694972
Weight components that became zero from non zero : 0
Weight components that became non zero from zero : 24
Weight components that became zero (below thresold) from non zero : 0
Weight components that became non zero (above thresold) from zero : 19


# Report

| Weights components | My Ridge | Sklearn Ridge | Sklearn LASSO |
|---|---|---|---|
| zero from non zero | 0 | 0 | 0 |
| non zero from zero | 65 | 65 | 24 |
| zero(below thresold) from non zero | 0 | 0 | 0 |
| non zero(above thresold) from zero | 59 | 58 | 19 |


1. The LASSO from sklearn gave least MSE when comapred to that of both my Ridge and sklearn Ridge Regressor.
1. The MSE obtained on test set using my Ridge and sklearn Ridge was close with sklearn giving less MSE.
1. The deviation in number of zero and non-zero weights was more for Ridge and that of LASSO.
1. Some of the weights learned by LASSO were exact zero(without thresold) where as there was none for Ridge Regressor.