In [1]:
#Illustration for bias-variance decomposition
#The model generation process Y = f(X) = exp(-6x^2); x is in R
#We generate 100 training samples of X, xi ~ U[-1,1]; i = 1, ...., 100
#Prediction model is 1-nearest neighbor
#New observation point is x0 = 0, where y0 = 1. 
#Denote training data by T, and generate 100 different training samples each of size N = 100.
#compute Var_T(y0_hat) = E_T(y0_hat - E_T(y0_hat))^2
#compute bias(y0_hat) = E_T(y0_hat) - f(x_0)
#Since there is no random error (deterministic prediction) expected prediction error (EPE) = mean sq. error = E_T(f(x_0) - y0_hat)^2 = Var_T(y0_hat) + bias(y0_hat)^2

In [214]:
import math
import numpy as np
from sklearn.neighbors import KNeighborsRegressor
import pandas as pd

#return a 2-d array of the form (x, y)

def data_generation(xc):
    x2 = np.power(xc, 2)
    x2 = -6*x2
    y = np.exp(x2)   
    
    z = np.column_stack((xc, y))    
    return z

In [215]:
#repeat the process 100 times
df = pd.DataFrame(columns = ['x0', 'y0', 'y0_hat'])

for i in np.arange(100):
    #generation of input variable
    s = np.random.uniform(-1, 1, 100)
    d0 = data_generation(s)    
    #run the prediction model using d0
    X = d0[:100,0].reshape(-1, 1)
    y =  d0[:100,1]

    neigh = KNeighborsRegressor(n_neighbors = 1)
    neigh.fit(X, y)
    y0_hat = neigh.predict([[0]])
    #store the data (x0, y0, y0_hat)
    df = df.append({'x0' : 0, 'y0' : 1, 'y0_hat' : y0_hat[0]}, ignore_index = True)

In [216]:
df.to_csv("bvar.csv")