# Simple linear regression

The model we assumed is linear as following:

In [2]:
def predict(alpha, beta, x_i):
    """the estimate output value of linear model"""
    return beta * x_i + alpha

In [3]:
def error(alpha, beta, x_i, y_i):
    """return the error term between estimate output
    and the true output value: y_i - \hat y_i"""
    return y_i - predict(alpha, beta, x_i)

In [4]:
def sum_of_squared_errors(alpha, beta, x, y):
    return sum(error(alpha, beta, x_i, y_i) ** 2
              for x_i, y_i in zip(x, y))

The following section is to use the least squares method to train the model

In [5]:
def mean(x):
    """return the mean value of x"""
    return sum(x_i for x_i in x)

In [11]:
def de_mean(x):
    """return the list x_i - mean(x)"""
    return [x_i - mean for x_i in x]

In [12]:
def de_mean_square(x):
    """return the list (x_i - mean(x)) ** 2"""
    return [(x_i - mean(x)) ** 2 for x_i in x]

In [13]:
import math

def standard_deviation(x):
    n = len(x)
    return math.sqrt(sum(de_mean_square(x)) / (n - 1))

In [14]:
def covariance(x, y):
    n = len(x)
    return dot(de_mean(x), de_mean(y)) / (n - 1)

In [15]:
def correlation(x, y):
    stdev_x = standard_deviation(x)
    stdev_y = standard_deviation(y)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(x, y) / stdev_x / stdev_y
    else:
        return 0

In [16]:
def least_squares_fit(x, y):
    """given the training values for x and y, 
    find the values of alpha and beta"""
    beta = correlation(x, y) * standard_deviation(y)/standard_deviation(x)
    alpha = mean(y) - beta * mean(x)
    return alpha, beta

In [17]:
def r_squared(alpha, beta, x, y):
    """the fraction of variation in y captured by the model"""
    return 1.0 - (sum_of_squared_errors(alpha, beta, x, y)/
                 sum(de_mean_square(y)))
    

## Using gradient descent

We write theta = [alpha, beta]

In [6]:
def squared_error(x_i, y_i, theta):
    alpha, beta = theta
    return error(alpha, beta, x_i, y_i) ** 2

In [7]:
def squared_error_gradient(x_i, y_i, theta):
    alpha, beta = theta
    return [-2 * error(alpha, beta, x_i, y_i), # derivative w.r.t alpha
           -2 * error(alpha, beta, x_i, y_i) * x_i] # derivative w.r.t beta

We haven't specified minimize_stochastic and data right now.

In [8]:
import random

random.seed(0)
theta = [random.random(), random.random()]
alpha, beta = minimize_stochastic(squared_error,
                                 squared_error_gradient,
                                 num_friends_good,
                                 daily_minutes_good,
                                  theta,
                                  0.0001
                                 )

NameError: name 'num_friends_good' is not defined

The followings are the implementation of stochastic gradient descent method.

In [1]:
def in_random_order(data):
    indexes = [i for i, _ in enumerate(data)]
    random.shuffle(indexes)
    for i in indexes:
        yield data[i]

In [2]:
def vector_subtract(v, w):
    return [v_i - w_i for v_i, w_i in zip(v, w)]

In [3]:
def scalar_multiply(alpha, v):
    return [alpha * v_i for v_i in v]

In [4]:
def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0 = 0.01):
    data = zip(x, y)
    theta = theta_0
    alpha = alpha_0
    min_theta, min_value = None, float("inf")
    iterations_with_no_improvement = 0
    
    while iterations_with_no_improvement < 100:
        value = sum( target_fn(x_i, y_i, theta) for x_i, y_i in data)
        
        if value < min_value:
            min_theta, min_value = theta, value
            iterations_with_no_improvement = 0
            alpha = alpha_0
        else:
            iterations_with_no_improvement += 1
            alpha *= 0.9
            
        for x_i, y_i in in_random_order(data):
            gradient_i = gradient_fn(x_i, y_i, theta)
            theta = vector_subtract(theta, scalar_multiply(alpha, gradient_i))
            
    return min_theta
    