# Machine Learning stuff (computations, plotting, data loading...)!

In [2]:
import numpy as np

# LINEAR REGRESSION

## Cost function

$$J(\mathbf{w},b)=\frac{1}{2m}\sum_{i=0}^{m-1}\left(f_{\mathbf{w},b}(\mathbf{x}^{(i)})-y^{(i)}\right)^2,$$
where $\mathbf{w}$ and $\mathbf{y}$ are (row) vectors of sizes $n$ (number of features) and $m$ (number of training sets), respectively. The $m\times n$ matrix $\mathbf{x}$ stores the training data set. The linear model is
$$f_{\mathbf{w},b}(\mathbf{x}^{(i)})=\mathbf{w}\cdot\mathbf{x}+b.$$

In [1]:
def cost_linear_regression(x,y,w,b):
    m = x.shape[0]
    cost = 0
    
    for i in range(m):
        cost += np.power(np.dot(w,x[i])+b-y[i],2)
        
    cost /= 2*m

    return cost

## Gradient of cost function
$$\frac{\partial J(\mathbf{w},b)}{\partial w_j}=\frac{1}{m}\sum_{i=0}^{m-1}\left(f_{\mathbf{w},b}(\mathbf{x}^{(i)})-y^{(i)}\right)x^{(i)}_j,\quad \frac{\partial J(\mathbf{w},b)}{\partial b}=\frac{1}{m}\sum_{i=0}^{m-1}\left(f_{\mathbf{w},b}(\mathbf{x}^{(i)})-y^{(i)}\right)$$

In [None]:
def gradient_linear_regression(x,y,w,b):
    m,n = x.shape
    
    dJ_dw = np.zeros(n)
    dJ_db = 0
    
    for i in range(m):
        arg = np.dot(x[i],w)+b-y[i]
        for j in range(n):
            dJ_dw[j] += arg*x[i,j]
        dJ_db[i] += arg
        
    dJ_dw /= m
    dJ_db /= m
    
    return dJ_dw, dJ_db