In [1]:
import numpy as np
from patsy import bs
import statsmodels.api as sm
from scipy.stats import chi2, kstest, uniform
import math

In [2]:
%load_ext rpy2.ipython



### Exercise 1

#### (a)

In [3]:
%%R
n <- 150
set.seed(1)
x <- runif(n)
y <- sin(2*x) + runif(n, -0.1, 0.1)*10

In [4]:
n = %R n
x = %R x
y = %R y

In [5]:
bx = bs(x=x, knots=[0.5], degree=3, lower_bound=0, upper_bound=1, include_intercept=True)
lm_design_matrix = sm.add_constant(x)

In [6]:
fhat_model = sm.OLS(y, bx)
fhat_results = fhat_model.fit()
lm_model = sm.OLS(y, lm_design_matrix)
lm_results = lm_model.fit()

In [7]:
W = (lm_results.resid**2).sum() / (fhat_results.resid**2).sum()
print((n*np.log(W)).item())

9.038138740699818


In [8]:
1-chi2.cdf(x=9.03813874, df=3)

0.02878808536319888

#### (b)

In [9]:
def W_func(x, y, resid=False):
    n = y.size
    bx = bs(x=x, knots=[0.5], degree=3, lower_bound=0, upper_bound=1, include_intercept=True)
    fhat_result = sm.OLS(y, bx).fit()
    lm_results = sm.OLS(y, sm.add_constant(x)).fit()
    rss0 = (lm_results.resid**2).sum()
    rss = (fhat_result.resid**2).sum()
    if resid:
        return ((n*np.log(rss0/rss)).item(), fhat_result.resid)
    return (n*np.log(rss0/rss)).item()

In [10]:
def pv_fun(x, y, m):
    W_obs, resid = W_func(x, y, resid=True)
    nlogW = []
    n = y.size
    for i in range(m):
        np.random.seed(3*i)
        epsilon = np.random.choice(resid, size=n)
        y_new = np.mean(y) + epsilon
        nlogW.append(W_func(x, y_new))
    nlogW = np.array(nlogW)
    return nlogW[nlogW>W_obs].size / m

In [11]:
pv_fun(x, y, 200)

0.02

### Exercise 2

#### (a)

In [12]:
pvalues_a = []
for i in range(500):
    np.random.seed(i)
    n2 = 150
    x2 = np.random.rand(n2)
    y2 = 1 + x2 + np.random.uniform(low=-0.1, high=0.1, size=n2)*5
    pvalues_a.append(1-chi2.cdf(x=W_func(x2, y2), df=3))

In [13]:
kstest(rvs=pvalues_a, cdf="uniform")

KstestResult(statistic=0.05824703990145641, pvalue=0.06460145463962808)

#### (b)

In [14]:
pvalues_b = []
for i in range(500):
    np.random.seed(i)
    n3 = 5000
    x3 = np.random.rand(n3)
    y3 = 1 + x3 + np.random.uniform(low=-0.1, high=0.1, size=n3)*5
    pvalues_b.append(1-chi2.cdf(x=W_func(x3, y3), df=3))

In [15]:
kstest(rvs=pvalues_b, cdf="uniform")

KstestResult(statistic=0.024483939175173886, pvalue=0.9180192973880814)