## Linear Non-Linear Functions



In [None]:
import numpy as np
from numpy.linalg import norm
from scipy.stats import distributions as iid
import pandas as pd

f0 = lambda x: x*np.sin(x) # True function

# Factory function for phi_k(x)
phi_factory = lambda c,s=1: lambda x: np.exp(-(1/(2*s))*norm(x-c)**2)  # RBF
# phi_factory = lambda c,s=1: lambda x: (x**c)/s  # Polynomial

In [None]:
def dgp(N,sigma_u):
    X = iid.uniform(loc=0,scale=2*np.pi).rvs(N).tolist()
    X.sort()

    u = iid.norm(scale=sigma_u)

    y = pd.Series([f0(x) + u.rvs(1)[0] for x in X])

    return X,y

N = 20
X,y = dgp(N,0.1)

Consider scatterplot:



In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots()

ax.scatter(X,y)

Domain = np.linspace(0,2*np.pi,N).tolist()

ax.plot(Domain,[f0(x) for x in Domain])

Now regression:



In [None]:
from cfe.df_utils import ols

## Or
K=5
phis = {k:phi_factory(k) for k in range(K)}

phis[0] = lambda x: 1 # Constant function

TX = {}
for k in range(K):
    TX[k] = [phis[k](x) for x in X]

TX = pd.DataFrame(TX)

try: # If y isn't a DataFrame make it one
    y = pd.DataFrame({'y':y})
except ValueError: # Guess it is!
    pass

alpha,se = ols(TX, y) 

# Check fit:
e = (y - TX@alpha.T)
e.var()

Note that expected *within* sample error variance is effectively zero!

Now construct $\hat{f}$ and plot predictions:



In [None]:
alpha

In [None]:
def fhat(x,alpha):

    yhat = 0
    for k,phik in phis.items():
        yhat += alpha[k]*phik(x)

    return yhat

Domain = np.linspace(0,2*np.pi,100).tolist()

_ = ax.plot(Domain,[fhat(x,alpha) for x in Domain])
fig

Compute the MSE:



In [None]:
dx = Domain[1]-Domain[0]
MSE = np.sum([((f0(x) - fhat(x,alpha))**2)*dx for x in Domain])

MSE

In [None]:
in_MSE = np.sum([((f0(x) - fhat(x,alpha))**2) for x in X])/N

in_MSE

In [None]:
# EMSE: sum of in and out of sample MSE??
EMSE = in_MSE + MSE

EMSE

### Questions



1.  What&rsquo;s the expected squared out of sample prediction error of this
    estimator (not just an estimate), using the same size sample as above?
    1.  In this case what&rsquo;s the expected squared bias?  The variance?

