In this programming exercise, you will implement the validation set approach and K-fold cross validation, while recalling that leave-one-out cross validation is K-fold cross validation for K = N , if N is the number of training samples. Implement the study carried out in Example 6.3 from the lecture notes. Note that due to randomization, you might get other results than the ones shown in the example. 

In [38]:
## Importing

import numpy as np
import pandas as pd
import random
from sklearn.neighbors import KNeighborsRegressor

In [126]:
## loss function
def L2_loss(pred, actual):
    return (pred-actual)**2
    
    
## Error estimation by validation set approach
def err_validation_set(x, y):
    
    ## Splitting data into train and validate sets
    split_size = random.randint(1, len(x)-1)
    train_x = x[0:split_size]
    train_y = y[0:split_size]
    validate_x = x[split_size:]
    validate_y = y[split_size:]
    
    ## Reshaping into np array
    train_x = np.array(train_x)
    validate_x = np.array(validate_x)
    train_x = train_x.reshape(-1,1)
    validate_x = validate_x.reshape(-1,1)
    
    ## Using KNN regressor to predict for single k
#     k = 3
#     clf = KNeighborsRegressor(n_neighbors = k)
#     clf.fit(train_x, train_y)
#     prediction = (clf.predict(validate_x))
#     print(prediction, validate_y)
#     Err = (1/len(validate_x)) * sum(L2_loss(prediction, validate_y))

    ## Using KNN regressor to predict for range of k
    Err = []
    
    for k in range(len(train_x), 0, -1):
        print(k)
        clf = KNeighborsRegressor(n_neighbors = k)
        clf.fit(train_x, train_y)
        prediction = (clf.predict(validate_x))
        Err.append((1/len(validate_x)) * sum(L2_loss(prediction, validate_y)))
    
    return Err
    

In [127]:
## Generate random input sample
N = 51
x = []
y = []

for i in range(0, N):
    mu, sigma = 0, 1.15 ## mean and standard deviation
    noise = np.random.normal(mu, sigma, 1)
    
    temp_x = random.randint(-4,4)
    temp_y = temp_x**2 + noise
    
    x.append(temp_x)
    y.append(temp_y)
    
## Error Validation Set     
err_validation_set(x, y)

25
24
23
22
21
20
19
18
17
16
15
14
13
12
11
10
9
8
7
6
5
4
3
2
1


[array([43.87020706]),
 array([42.16986882]),
 array([40.67577077]),
 array([39.48107206]),
 array([38.02041577]),
 array([36.53870265]),
 array([33.66742964]),
 array([31.87081598]),
 array([28.39372086]),
 array([26.72240645]),
 array([24.40359849]),
 array([22.07340571]),
 array([19.01019888]),
 array([15.6239639]),
 array([11.02316882]),
 array([9.21671641]),
 array([8.33107219]),
 array([5.80850185]),
 array([4.92178493]),
 array([3.25836485]),
 array([2.19548522]),
 array([1.51386487]),
 array([3.11622148]),
 array([2.74152554]),
 array([3.65419112])]