# Linear Regression with Ridge Regularization
##### <div style='text-align:right'>made by Wonbin Kim</div>

In [None]:
import numpy as np
from example import plotting, example3, example4
%matplotlib inline

# Dataset 3. More Complex Toy data

# 2. Ridge Regression

### <center>\\(\mathcal{J}_{Ridge}(\mathbf{w})= \frac{1}{2N}||y-\phi^T\mathbf{w}||^2_2+\frac{\lambda}{2}||\mathbf{w}||^2_2\\)</center>
<img src='image/pic10.png'>

### Find <center> \\( \hat{\mathbf{w}}_{Ridge} = \arg \min\limits_{\mathbf{W}} \frac{1}{2}||y-\phi^T\mathbf{w}||^2_2+\frac{\lambda_N}{2}||\mathbf{w}||^2_2\\) </center>

where $\lambda$ is trade off parameter(between LS and Regularizer), $\phi(x) \in \mathbb{R}^{d\times N}$ , $y \in \mathbb{R}^{k \times N}$, and $\mathbf{w} \in \mathbb{R}^{k\times d}$. N, d, k denote the number of instances, the dimensionality of covariate X, and the dimentionality of response Y, respectively.

### Caution!

For implementational simplicity, X is transposed in practice. i.e. $ x \in \mathbb{R}^{N\times D}$, a row represents a instance.

In [None]:
# Objective function
def ls(x, y, w, b = 0., l=1.):
    ###
    pass
    ###
    return loss

### 구현 2. w를 구하는 numpy code 구성하고, y의 prediction 값 구하기

## <center>\\(\mathbf{w}_{ML} = \left(\Phi^T\Phi+\lambda I\right)^{-1}\Phi^T\mathbf{t}\\)</center>

> 이 때 $\Phi$는 Design matrix로 다음과 같다. \begin{equation} 
    \Phi =
        \left(
            \begin{matrix}
                \phi_0(x_1) & \phi_1(x_1) & \cdots & \phi_{M-1}(x_1)\\
                \phi_0(x_2) & \phi_1(x_2) & \cdots & \phi_{M-1}(x_1)\\
                \vdots & \vdots & \ddots & \vdots \\
                \phi_0(x_N) & \phi_x(x_N) & \cdots & \phi_{M-1}(x_N)
            \end{matrix}
        \right)
\end{equation}

Hint : np.dot, np.linalg.pinv, np.transpose, np.eye

In [None]:
def find_w(x, y, l = 1.):
    ###
    pass
    ###
    return w_ls

In [None]:
l = 5. # hyper-parameter

## (2) Polynomial basis function with Numpy

### <center> \\( \phi_j(x) = x^j\\) </center>

### <center>\\(\phi(x) = [x^m, x^{m-1}, ... , x^1, 1]\\)</center>

In [None]:
def powers(X, degree=1):
    ###
    pass
    ###
    return new_X

In [None]:
train_X, train_Y, test_X, test_Y, whole_X, whole_Y = example3(False)

## By sklearn

In [None]:
new_train_X = powers(train_X, 4)
new_test_X = powers(test_X, 4)
new_whole_X = powers(whole_X, 4)
from sklearn import linear_model
rid = linear_model.Ridge(alpha=l)
rid.fit(new_train_X, train_Y)
pred_y = rid.predict(new_whole_X)
w_ls = rid.coef_
b_ls = rid.intercept_
print(ls(new_train_X, train_Y, w_ls.T, b_ls))
print(ls(new_test_X, test_Y, w_ls.T, b_ls))
plotting([train_X, test_X, whole_X],
         [train_Y, test_Y, pred_y], label=['train', 'test', '4-polynomial'])

### Comparing experiments

In [None]:
def compare(train_x, train_y, test_x, test_y, whole_x, lamb, degree = 4, ):
    exp = []
    for i in range(1, degree+1, 1):
        t_X_i = powers(train_x, i)
        te_X_i = powers(test_x, i)
        w_X_i = powers(whole_x, i)
        w_ls = find_w(t_X_i, train_y, l=lamb)
        pred_y = np.dot(w_X_i, w_ls)
        train_loss = ls(t_X_i, train_y, w_ls,l=lamb)
        test_loss = ls(te_X_i, test_y, w_ls,l=lamb)
        exp.append({'pred_y':pred_y,
                    'whole':w_X_i, 
                    'train_loss':train_loss,
                    'test_loss':test_loss})
    for i in range(degree):
        print("Degree : [{}], train loss : [{:.3f}], test_loss : [{:.3f}]".format(i+1,
            exp[i]['train_loss'], exp[i]['test_loss']))    
    plotting([train_x, test_x]+ degree*[whole_x],
         [train_y, test_y]+[exp[i]['pred_y'] for i in range(degree)],
         label=['train', 'test','linear']+['{}-polynomial'.format(i+2) for i in range(degree-1)])

In [None]:
train_X, train_Y, test_X, test_Y, whole_X, whole_Y = example3(False)
for i in range(12):
    compare(train_X, train_Y, test_X, test_Y, whole_X, 10**(i-5),7)

In [None]:
train_X, train_Y, test_X, test_Y, whole_X, whole_Y = example4(False)
for i in range(10):
    compare(train_X, train_Y, test_X, test_Y, whole_X, 10**(i-5),6)