# Visualizing Linear Regression and Gradient Descent

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Generate a Dataset

In [None]:
smsize = 10
np.random.seed(111)
x = np.random.rand(smsize)
y = 0.3 + 0.5 * x + np.random.rand(smsize)
print('x\ty')
for i in range(smsize):
    print('%.4f\t%.4f' %(x[i], y[i]))

## Visualizing Simple Linear Regression

In [None]:
# a scatter plot
fig = plt.figure(figsize = (10, 5))   
ax = fig.add_subplot(1, 1, 1)
ax.plot(x, y, 'o')
# simple linear regression
LR = np.polyfit(x, y, 1)
# a least-squares regression line
x2 = np.linspace(0, 1, 11)
y2 = np.poly1d(LR)(x2)
ax.plot(x2, y2, '-', lw=3)
# visualizing the residuals
y_pred = np.poly1d(LR)(x)
res = y - y_pred
yerr_lm = res.copy()
yerr_lm[res > 0] = 0
yerr_lm[res < 0] = -1 * yerr_lm[res < 0]
yerr_um = res.copy()
yerr_um[res <= 0] = 0
ax.errorbar(x, np.poly1d(LR)(x), yerr=[yerr_lm, yerr_um], marker='s', fmt='o', elinewidth=2)
# configuring the plot
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('Simple Linear Regression')
ax.set_ylim(0, 2)
ax.text(0.82, 1.35, 'y='+'%.2f'%LR[1]+'+'+'%.2f'%LR[0]+'x')
ax.plot()

## Bias-Variance Tradeoff
- Error due to bias is the difference between the average prediction of our model and the actual (correct) value we are trying to predict. A model with high bias pays little attention to training data and is oversimplified. It leads to high errors on both training and test data. 
- Error due to variance is the variation in model prediction for a given data point. A model with high variance pays a lot of attention to training data and does not generalize on the data it hasn't seen before. It performs well on training data but has high errors on test data.
- An unfitting model is too simple and has few parameters. It may have high bias and low variance.
- An overfitting model is too complex and has too many parameters. It may have low bias and high variance.

In [None]:
# polynomial regression equations of different degrees
fig = plt.figure(figsize = (10, 5))   
ax = fig.add_subplot(1, 1, 1)
ax.plot(x, y, 'o')
x2 = np.linspace(0, 1, 101)
# baseline; degree = 0
ax.axhline(np.mean(y), xmin=0, xmax=1, linestyle='-')
# degree = 1
poly_1 = np.poly1d(np.polyfit(x, y, 1))
ax.plot(x2, poly_1(x2), '-.')
# degree = smsize-1
poly_s = np.poly1d(np.polyfit(x, y, smsize-1))
ax.plot(x2, poly_s(x2), '--')
# configuring the figure
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_title('')
ax.set_ylim(-3, 8)
ax.plot()

## Visualizing Gradient Descent

In [None]:
# linear regression
para_history = []
loss_history = []
# learning rate
lr = 0.2
b0 = -5
b1 = -5
for t in range(400):
    y_pred = b0 + b1 * x
    res = y - y_pred
    loss = np.sum(np.square(res)) / y.shape[0]
    para_history.append((b0, b1))
    loss_history.append(loss)
    # gradient descent
    b0 += 2 * lr * np.sum(res) / y.shape[0]
    b1 += 2 * lr * np.sum(res * x) / y.shape[0]
print('Loss = %.4f' %loss)
print("b0=%.4f" %b0)
print("b1=%.4f" %b1)
for i in range(400):
    if (i+1) % 10 == 0:
        print("Iteration %3d: Loss = %.4f" %(i+1, loss_history[i]))

## Visualizing Gradient Descent

In [None]:
# a contour plot to demonstrate gradient descent
levels = [0, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0, 40.0, 60.0, 80.0, 100.0]
w0 = np.linspace(-10,10, 101)
w1 = np.linspace(-10,10, 101)
B0, B1 = np.meshgrid(w0, w1)
f = np.sum(np.square(y - (B0[:, :, np.newaxis] + B1[:, :, np.newaxis] * x)), axis = 2) / y.shape
cp = plt.contour(B0, B1, f, levels)
plt.clabel(cp, inline=1, fontsize=8)
plt.axhline(0, color='black', alpha=.5, dashes=[2, 4],linewidth=1)
plt.axvline(0, color='black', alpha=0.5, dashes=[2, 4],linewidth=1)
for i in range(5):
    plt.annotate('', xy=para_history[i+1], xytext = para_history[i],
                 arrowprops={'arrowstyle': '->', 'color': 'r', 'lw': 1},
                 va='center', ha='center')
plt.xlabel('w0')
plt.ylabel('w1')
plt.show()