# Exercises

There are three exercises in this notebook:

1. Use the cross-validation method to test the linear regression with different $\alpha$ values, at least three.
2. Implement a SGD method that will train the Lasso regression for 10 epochs.
3. Extend the Fisher's classifier to work with two features. Use the class as the $y$.

## 1. Cross-validation linear regression

You need to change the variable ``alpha`` to be a list of alphas. Next do a loop and finally compare the results.

In [3]:
import numpy as np
from sklearn.linear_model import Ridge, Lasso
from sklearn.model_selection import cross_val_score

In [1]:
# unecessary, but same results as scikit-learn sklearn.model_selection.cross_val_score
def divide_n_folds(x, y, n_folds):
    n_samples = x.shape[0]
    
    indices = np.arange(n_samples)
    
    fold_sizes = np.full(n_folds, n_samples // n_folds, dtype=int)
    fold_sizes[:n_samples % n_folds] += 1 # if n_samples is indivisible  by n_folds
    
    folds = [] # indices for each fold
    start = 0
    for fold_size in fold_sizes:
        stop = start + fold_size
        folds.append(indices[start:stop])
        start = stop
        
    new_x = []
    new_y = []
        
    for i, test_idx in enumerate(folds):
        train_idx = np.concatenate([fold for fold_idx, fold in enumerate(folds) if fold_idx != i])
        new_x.append((x[train_idx], x[test_idx]))
        new_y.append((y[train_idx], y[test_idx]))
        
    return new_x, new_y

In [24]:
x = np.array([188, 181, 197, 168, 167, 187, 178, 194, 140, 176, 168, 192, 173, 142, 176]).reshape(-1, 1) # [15, 1]
y = np.array([141, 106, 149, 59, 79, 136, 65, 136, 52, 87, 115, 140, 82, 69, 121]).reshape(-1, 1) # [15, 1]

x = np.append(x, np.ones((15,1)), axis = 1) # [15, 2], added bias column
I = np.identity(2) # [2, 2] -> [[1, 0], [0, 1]]
alphas = [0.0, 0.1, 0.5, 1, 10]

results = []
for alpha in alphas:
    w = np.dot(np.linalg.inv(np.dot(x.T, x) + alpha * I), np.dot(x.T,y))
    w=w.ravel() # it's like flattening
    results.append(w)

print(f'Alpha\tMy_Ridge_Coefs\tSklearn_Ridge_Coefs')
for i, alpha in enumerate(alphas):
    ridge = Ridge(alpha=alpha, fit_intercept=False)
    ridge.fit(x, y)
    print(f'{alpha}\t({results[i][0]},{results[i][1]})\t({ridge.coef_[0]},{ridge.coef_[1]})')
    
print("\nMSE Calculations")
for i, alpha in enumerate(alphas):
    predictions = np.dot(x, results[i])
    print(f'Alpha: {alpha}, MSE: {np.sum(np.pow(y.ravel() - predictions, 2))/ predictions.shape[0]}')

Alpha	My_Ridge_Coefs	Sklearn_Ridge_Coefs
0.0	(1.618142468878985,-180.92401771633934)	(1.618142468878913,-180.92401771632694)
0.1	(1.1697875748697704,-101.72397080681492)	(1.1697875748697717,-101.72397080681392)
0.5	(0.8032416872615298,-36.975220157163406)	(0.8032416872615283,-36.97522015716356)
1	(0.7104861640228338,-20.590447055498938)	(0.710486164022832,-20.590447055498714)
10	(0.6068810679043629,-2.291062615390463)	(0.6068810679043627,-2.29106261539043)

MSE Calculations
Alpha: 0.0, MSE: 372.33129215179673
Alpha: 0.1, MSE: 426.04507708317414
Alpha: 0.5, MSE: 549.7710602647388
Alpha: 1, MSE: 592.4636104956176
Alpha: 10, MSE: 645.5799592900984


## 2. Implement based on the Ridge regression example, the Lasso regression.

Please implement the SGD method and compare the results with the sklearn Lasso regression results. 

In [26]:
def sgd(x, y, n_steps, learning_rate, alpha):
    n, n_features = x.shape
    w =np.zeros((n_features, 1))
    for _ in range(n_steps):
        predictions = np.dot(x, w) # [n, n_features] x [n_features, 1] = [n, 1]
        # standard gradients for linear regression
        gradient = (-2/ n) * np.dot(x.T, (y - predictions)) # [n_features, n] x [n, 1] =  [n_features, 1]
        
        # L1 regularization gradients
        subgradient = alpha * np.sign(w)
        
        w -= learning_rate * (gradient + subgradient)

    return w[0][0], w[1][0]    

In [27]:
x = np.array([188, 181, 197, 168, 167, 187, 178, 194, 140, 176, 168, 192, 173, 142, 176])
y = np.array([141, 106, 149, 59, 79, 136, 65, 136, 52, 87, 115, 140, 82, 69, 121]).reshape(-1, 1)

x = np.c_[np.ones((15,1)), x] # [15, 2] -> bias column 0, weight column 1
alpha = 0.1

# x needs to be rescaled/standardized before sgd
x_mean = np.mean(x[:, 1:], axis=0)
x_std = np.std(x[:, 1:], axis=0)
x[:, 1:] = (x[:, 1:] - x_mean) / x_std

w = sgd(x, y, 10, 0.3, alpha)

lass_regression = Lasso(alpha=alpha, fit_intercept=False)
lass_regression.fit(X=x, y=y)

print(f'w = ({w[0]}, {w[1]}), sklearn Lasso -> ({lass_regression.coef_[0]}, {lass_regression.coef_[1]})')

w = (102.40593536511999, 26.28450510066113), sklearn Lasso -> (102.36666666666665, 26.237253654669942)


In [29]:
print("Final predictions: \nY_True \t\t Pred")
pred = np.dot(x, w)
y_flattened = y.ravel()
for i in range(len(pred)):
    print(f'{pred[i]:0.4f} \t {y_flattened[i]}')

print(f'\nMSE: {np.mean((y_flattened - pred)** 2)}')

Final predictions: 
Y_True 		 Pred
123.1843 	 141
111.8800 	 106
137.7185 	 149
90.8863 	 59
89.2714 	 79
121.5694 	 136
107.0353 	 65
132.8737 	 136
45.6691 	 52
103.8055 	 87
90.8863 	 115
129.6439 	 140
98.9608 	 82
48.8989 	 69
103.8055 	 121

MSE: 372.33776285273433


## 3. Extend the Fisher's classifier

Please extend the targets of the ``iris_data`` variable and use it as the $y$.

In [30]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

iris_data = load_iris()
iris_df = pd.DataFrame(iris_data.data,columns=iris_data.feature_names)
iris_df['target'] = iris_data.target
print(iris_df.head())
print('\n')

x = iris_df[['sepal width (cm)', 'sepal length (cm)']].values
y = iris_df['target'].values.reshape(-1, 1)
    
dataset_size = np.size(x)

mean_x, mean_y = np.mean(x), np.mean(y)

SS_xy = np.sum(y * x) - dataset_size * mean_y * mean_x
SS_xx = np.sum(x * x) - dataset_size * mean_x * mean_x

a = SS_xy / SS_xx
b = mean_y - a * mean_x

y_pred = a * x + b

# Results
print(y_pred)

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


[[0.92478522 1.05141831]
 [0.88521238 1.03558917]
 [0.90104152 1.01976004]
 [0.89312695 1.01184547]
 [0.93269979 1.04350374]
 [0.95644349 1.07516201]
 [0.91687065 1.01184547]
 [0.91687065 1.04350374]
 [0.87729781 0.99601633]
 [0.89312695 1.03558917]
 [0.94061436 1.07516201]
 [0.91687065 1.02767461]
 [0.88521238 1.02767461]
 [0.88521238 0.98810177]
 [0.96435806 1.10682029]
 [0.99601633 1.09890572]
 [0.95644349 1.07516201]
 [0.92478522 1.05141831]
 