## Programming Exercise 5: Regularized Linear Regression and Bias vs. Variance

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
#import pandas as pd
import scipy.io #Used to load the OCTAVE *.mat files
#import scipy.misc #Used to show matrix as an image
#import matplotlib.cm as cm #Used to display images in a specific colormap
#import random #To pick random images to display
import scipy.optimize #fmin_cg to train the linear regression
#import itertools
#from scipy.special import expit #Vectorized sigmoid function

### 1 Regularized Linear Regression

#### 1.1 Visualizing the dataset

In [None]:
datafile = 'data/ex5data1.mat'
mat = scipy.io.loadmat( datafile )
#Training set
X, y = mat['X'], mat['y']
#Cross validation set
Xval, yval = mat['Xval'], mat['yval']
#Test set
Xtest, ytest = mat['Xtest'], mat['ytest']
#Insert a column of 1's to all of the X's, as usual
X =     np.insert(X    ,0,1,axis=1)
Xval =  np.insert(Xval ,0,1,axis=1)
Xtest = np.insert(Xtest,0,1,axis=1)
#print "'y' shape: %s. Unique elements in y: %s"%(mat['y'].shape,np.unique(mat['y']))
#print "'X' shape: %s. X[0] shape: %s"%(X.shape,X[0].shape)
#X is 5000 images. Each image is a row. Each image has 400 pixels unrolled (20x20)
#y is a classification for each image. 1-10, where "10" is the handwritten "0"

In [None]:
#print "X.shape = ",X.shape
#print "y.shape = ",y.shape
#print "Xval.shape = ",Xval.shape
#print "Xtest.shape = ",Xtest.shape

In [None]:
def plotData():
    plt.figure(figsize=(8,5))
    plt.ylabel('Water flowing out of the dam (y)')
    plt.xlabel('Change in water level (x)')
    plt.plot(X[:,1],y,'rx')
    plt.grid(True)
    
plotData()

#### 1.2 Regularized linear regression cost function

In [None]:
def h(theta,X): #Linear hypothesis function
    return np.dot(X,theta)

def computeCost(mytheta,myX,myy,mylambda=0.): #Cost function
    """
    theta_start is an n- dimensional vector of initial theta guess
    X is matrix with n- columns and m- rows
    y is a matrix with m- rows and 1 column
    """
    m = myX.shape[0]
    myh = h(mytheta,myX).reshape((m,1))
    mycost = float((1./(2*m)) * np.dot((myh-myy).T,(myh-myy)))
    regterm = (float(mylambda)/(2*m)) * float(mytheta[1:].T.dot(mytheta[1:]))
    return mycost + regterm

In [None]:
#"Using theta initialized at [1; 1], you should expect to see an output of 303.993."
mytheta = np.array([[1.],[1.]])
print computeCost(mytheta,X,y,mylambda=0.)

#### 1.3 Regularized linear regression gradient

In [None]:
def computeGradient(mytheta,myX,myy,mylambda=0.):
    mytheta = mytheta.reshape((mytheta.shape[0],1))
    m = myX.shape[0]
    #grad has same shape as myTheta (2x1)
    myh = h(mytheta,myX).reshape((m,1))
    grad = (1./float(m))*myX.T.dot(h(mytheta,myX)-myy)
    regterm = (float(mylambda)/m)*mytheta
    regterm[0] = 0 #don't regulate bias term
    regterm.reshape((grad.shape[0],1))
    return grad + regterm

#Here's a wrapper for computeGradient that flattens the output
#This is for the minimization routine that wants everything flattened
def computeGradientFlattened(mytheta,myX,myy,mylambda=0.):
    return computeGradient(mytheta,myX,myy,mylambda=0.).flatten()

In [None]:
#"Using theta initialized at [1; 1] you should expect to see a
#gradient of [-15.30; 598.250]"
mytheta = np.array([[1.],[1.]])
print computeGradient(mytheta,X,y)

#### 1.4 Fitting linear regression

In [None]:
def optimizeTheta(myTheta_initial, myX, myy, mylambda=0.):
    fit_theta = scipy.optimize.fmin_cg(computeCost,x0=myTheta_initial,\
                                       fprime=computeGradientFlattened,\
                                       args=(myX,myy,mylambda))
    fit_theta = fit_theta.reshape((myTheta_initial.shape[0],1))
    return fit_theta

In [None]:
mytheta = np.array([[1.],[1.]])
fit_theta = optimizeTheta(mytheta,X,y,0.)

In [None]:
plotData()
plt.plot(X[:,1],h(fit_theta,X).flatten())

### 2 Bias-variance

#### 2.1 Learning curves

### 3 Polynomial regression

#### 3.1 Learning Polynomial Regression

#### 3.2 Optional: Adjusting the regularization parameter

#### 3.3 Selecting $\lambda$ using a cross validation set

#### 3.4 Optional: Computing test set error

#### 3.5 Optional: Plotting learning curves with randomly selected examples