In [1]:
import numpy as np
import scipy.optimize as opt
from scipy.io import loadmat
from sklearn.preprocessing import PolynomialFeatures

from matplotlib import cm
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.ticker import LinearLocator, FormatStrFormatter

In [2]:
def load_data(filename):
    data = loadmat(filename)
    return data['X'], data['y'], data['Xval'], data['yval'], data['Xtest'], data['ytest']

In [3]:
def linear_reg_cost(theta, X, y, Lambda):
    m = X.shape[0]
    theta = theta.reshape(-1, y.shape[1])
    unreg_term = (1 / (2 * m)) * np.sum(np.square(np.dot(X, theta) - y))
    reg_term = (Lambda / (2 * m)) * np.sum(np.square(theta[1:len(theta)]))
    cost = unreg_term + reg_term
    grad = np.zeros(theta.shape)
    grad = (1 / m) * np.dot(X.T, np.dot(X, theta) - y) + (Lambda / m ) * theta
    unreg_grad = (1 / m) * np.dot(X.T, np.dot(X, theta) - y)
    grad[0] = unreg_grad[0]
    return (cost, grad.flatten())

def linear_reg_train(X, y, Lambda):
    theta = np.zeros([X.shape[1], 1])
    def cost_func(Theta):
        return linear_reg_cost(Theta, X, y, Lambda)
    result = opt.minimize(fun=cost_func, x0=theta, method='CG', jac=True, options={'maxiter':200})
    return result.x

def learning_curve(X, y, Xval, yval, Lambda):
    m = X.shape[0]
    error_train = np.zeros((m, 1))
    error_val = np.zeros((m, 1))
    for i in range(1, m + 1):
        theta = linear_reg_train(X[:i], y[:i], Lambda)
        error_train[i - 1] = linear_reg_cost(X[:i], y[:i], theta, 0)[0]
        error_val[i - 1] = linear_reg_cost(Xval, yval, theta, 0)[0]
    return error_train, error_val

In [4]:
def test_one():
    X, y, Xval, yval, Xtest, ytest = load_data('ex5data1.mat')
    print("Shapes of X{} and y{}".format(X.shape, y.shape))
    print("Shapes of Xval{} and yval{}".format(Xval.shape, yval.shape))
    print("Shapes of Xtest{} and ytest{}".format(Xtest.shape, ytest.shape))
    plt.figure(figsize=(8, 6))
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.title('Figure 1: Data')
    plt.plot(X, y, 'rx')
    plt.show()

def test_two():
    Lambda = 1.0
    theta = np.array([[1], [1]])
    X, y, Xval, yval, Xtest, ytest = load_data('ex5data1.mat')
    X_ones = np.insert(X, 0, 1, axis=1)
    cost, grad = linear_reg_cost(theta, X_ones, y, Lambda)
    print('Cost and Gradient at theta = [1, 1]:', cost, grad)

def test_three():
    Lambda = 0.0
    X, y, Xval, yval, Xtest, ytest = load_data('ex5data1.mat')
    X_ones = np.insert(X, 0, 1, axis=1)
    theta = linear_reg_train(X_ones, y, Lambda)
    print('Theta: ', theta)
    plt.figure(figsize=(8, 6))
    plt.xlabel('Change in water level (x)')
    plt.ylabel('Water flowing out of the dam (y)')
    plt.title('Figure 2: Linear Fit')
    plt.plot(X, y, 'rx')
    plt.plot(X, np.dot(np.insert(X, 0, 1, axis=1), theta), '--')
    plt.show()

def test_four():
    Lambda = 0.0
    X, y, Xval, yval, Xtest, ytest = load_data('ex5data1.mat')
    X_ones = np.insert(X, 0, 1, axis=1)
    Xval_ones = np.insert(Xval, 0, 1, axis=1)
    error_train, error_val = learning_curve(X_ones, y, Xval_ones, yval, Lambda)

    print('# Training Examples\tTrain Error\tCross Validation Error\n')
    for i in range(X.shape[0]):
        print('{}\t\t\t{:f}\t{:f}\n'.format(i+1, float(error_train[i]), float(error_val[i])))

    plt.figure(figsize=(8, 6))
    plt.xlabel('Number of training examples')
    plt.ylabel('Error')
    plt.title('Figure 3: Linear regression learning curve')
    plt.plot(range(1,m+1), error_train, 'b', label='Train')
    plt.plot(range(1,m+1), error_val, 'g', label='Cross Validation')
    plt.legend()
    plt.show()

In [5]:
test_four()

IndexError: tuple index out of range