In [45]:
import numpy as np
import pandas as pd
from scipy import linalg
from scipy import stats

from stat570.linear_model.linear_regression import LinearRegression

np.set_printoptions(suppress=True)

In [25]:
BETA_0 = -2
BETA_1 = np.log(2)

In [72]:
experiments = pd.DataFrame(index=pd.MultiIndex.from_product([
    [0.2, 1, 10, 1000],
    [10, 20, 50, 100, 250],
], names=['b', 'n']))

In [78]:
def generate_data(b, n):
    x = stats.norm.rvs(loc=0, scale=1, size=n)    
    mu = np.exp(x*BETA_1 + BETA_0)
    theta = stats.gamma.rvs(b, scale=1/b, size=n)
    return x, stats.poisson.rvs(theta*mu, size=n)

def estimate_by_poisson_model(x, y):
    pass

def estimate_by_quasi_likelihood(x, y):
    pass

def estimate_by_sandwich(x, y):
    pass

In [None]:
from scipy import optimize

np.random.seed(2018)
for b, n in experiments.index:
    print('b={}, n={}'.format(b, n))
    estimates = []
    variances = []
    for i in range(1000):                
        x, y = generate_data(b, n)
    
        def score(beta):
            beta_0 = beta[0]
            beta_1 = beta[1]
            return np.array([
                np.sum(y - np.exp(beta_0 + beta_1*x)),
                np.sum(x*y - x*np.exp(beta_0 + beta_1*x)),
            ])
    
        beta_hat = optimize.root(score, np.array([-2, np.log(2)]))['x']        
        mu_hat = np.exp(beta_hat[0] + beta_hat[1]*x)
        beta_hat_variance = np.array([
            np.sum(x*x*mu_hat),
            np.sum(mu_hat)
        ])/(np.sum(mu_hat)*np.sum(x*x*mu_hat) - np.square(np.sum(x*mu_hat)))
        
        estimates.append(beta_hat)
        variances.append(beta_hat_variance)
        
    estimates = np.array(estimates)
    variances = np.array(variances)
    
    print(np.sum(np.abs(estimates[:,0] - BETA_0) <= np.sqrt(variances[:,0])*stats.norm.isf((1 - 0.95)/2)))
    print(np.sum(np.abs(estimates[:,1] - BETA_1) <= np.sqrt(variances[:,1])*stats.norm.isf((1 - 0.95)/2)))    
    print(np.mean(estimates, axis=0))

b=0.2, n=10




949
981
[-142.46299267   10.98308239]
b=0.2, n=20
943
927
[-55.79391065   2.80701793]
b=0.2, n=50
947
855
[-4.7377488   0.74169047]
b=0.2, n=100
894
810
[-2.13483663  0.65428888]
b=0.2, n=250
878
797
[-2.03853705  0.67805529]
b=1.0, n=10
970
986
[-104.95182527   13.20492089]
b=1.0, n=20
964
957
[-21.88632147   1.91940514]
b=1.0, n=50


In [66]:
stats.norm.isf((1 - 0.95)/2)

1.959963984540054