In [1]:
import numpy as np
import pandas as pd
from scipy import linalg
from scipy import stats

np.set_printoptions(suppress=True)
np.seterr(divide='raise', invalid='raise', over='warn', under='raise')

{'divide': 'warn', 'invalid': 'warn', 'over': 'warn', 'under': 'ignore'}

In [2]:
BETA_0 = -2
BETA_1 = np.log(2)

In [3]:
def generate_data(b, n):
    x = stats.norm.rvs(loc=0, scale=1, size=n)    
    mu = np.exp(x*BETA_1 + BETA_0)
    theta = stats.gamma.rvs(b, scale=1/b, size=n)
    return x, stats.poisson.rvs(theta*mu, size=n)

def estimate_by_poisson_model(x, y):
    def score(beta):
        beta_0 = beta[0]
        beta_1 = beta[1]
        return np.array([
            np.sum(y - np.exp(beta_0 + beta_1*x)),
            np.sum(x*y - x*np.exp(beta_0 + beta_1*x)),
        ])
    
    beta_hat = optimize.root(score, np.array([-2, np.log(2)]))['x']        
    mu_hat = np.exp(beta_hat[0] + beta_hat[1]*x)
    
    fisher_determinant = np.sum(mu_hat)*np.sum(x*x*mu_hat) - np.square(np.sum(x*mu_hat))
    beta_hat_variance = np.array([
        [np.sum(x*x*mu_hat), -np.sum(x*mu_hat)],
        [-np.sum(x*mu_hat), np.sum(mu_hat)]
    ])/fisher_determinant
    
    return beta_hat, beta_hat_variance

def estimate_by_quasi_likelihood(x, y):
    beta_hat, beta_hat_variance = estimate_by_poisson_model(x, y)
    
    mu_hat = np.exp(beta_hat[0] + beta_hat[1]*x)
    if np.any(mu_hat == 0):
        print(beta_hat)
        print(x)
        print(y)
    alpha_hat = np.sum(np.square(y - mu_hat)/mu_hat)/(len(y) - 2)
    
    return beta_hat, alpha_hat*beta_hat_variance    

def estimate_by_sandwich(x, y):
    beta_hat, beta_hat_variance = estimate_by_poisson_model(x, y)
    
    mu_hat = np.exp(beta_hat[0] + beta_hat[1]*x)
    #residuals = np.expand_dims((y - mu_hat)/mu_hat, -1)
    #residuals = residuals.dot(residuals.T)
    #d_transpose = np.array([mu_hat, x*mu_hat])     
    #b_hat = d_transpose.dot(residuals).dot(d_transpose.T)
    b_hat = np.sum(
        np.square(y - mu_hat)[:,np.newaxis,np.newaxis]*np.transpose(np.array([
            [np.ones_like(y), x],
            [x, x*x],
        ]), axes=[2,1,0]),
        axis=0)
    
    return beta_hat, beta_hat_variance.dot(b_hat).dot(beta_hat_variance.T)

def is_covered_by_confidence_interval(estimates, actual, variances, level=0.95):
    return np.abs(estimates - actual) <= stats.norm.isf((1 - level)/2)*np.sqrt(np.diag(variances))

In [4]:
experiments = pd.DataFrame(index=pd.MultiIndex.from_product([
    [0.2, 1, 10, 1000],
    [10, 20, 50, 100, 250, 1000],
], names=['b', 'n']))

In [5]:
import logging
from scipy import optimize

results_beta_0 = pd.DataFrame(
    index=pd.Series(experiments.index.levels[1], name='$n$'),
    columns=pd.MultiIndex.from_product([
        experiments.index.levels[0],
        ['Poisson', 'Quasi-likelihood', 'Sandwich']],
        names=['$b$', 'Model']))

results_beta_1 = results_beta_0.copy()

np.random.seed(2018)
for b, n in experiments.index:
    print('b={}, n={}'.format(b, n))
    poisson_estimates = []
    poisson_variances = []
    
    quasi_likelihood_estimates = []
    quasi_likelihood_variances = []
    
    sandwich_estimates = []
    sandwich_variances = []
    
    for i in range(100000):
        try:
            x, y = generate_data(b, n)
            poisson_estimate, poisson_variance = estimate_by_poisson_model(x, y)            
            quasi_likelihood_estimate, quasi_likelihood_variance = estimate_by_quasi_likelihood(x, y)
            sandwich_estimate, sandwich_variance = estimate_by_sandwich(x, y)
        except FloatingPointError as e:
            #logging.warning(e)
            continue
            
        if np.any(np.diag(poisson_variance) < 0):
            #logging.warning('Negative variance.')
            continue
        
        poisson_estimates.append(poisson_estimate)
        poisson_variances.append(poisson_variance)        
        
        quasi_likelihood_estimates.append(quasi_likelihood_estimate)
        quasi_likelihood_variances.append(quasi_likelihood_variance)
            
        sandwich_estimates.append(sandwich_estimate)
        sandwich_variances.append(sandwich_variance)    
    
    poisson_estimates = np.array(poisson_estimates)
    poisson_variances = np.array(poisson_variances)
    
    quasi_likelihood_estimates = np.array(quasi_likelihood_estimates)
    quasi_likelihood_variances = np.array(quasi_likelihood_variances)
    
    sandwich_estimates = np.array(sandwich_estimates)
    sandwich_variances = np.array(sandwich_variances)
    
    is_covered_by_confidence_interval_vectorized = (
        np.vectorize(
            lambda e, v: is_covered_by_confidence_interval(
                e, np.array([BETA_0, BETA_1]), v),
            otypes=[np.bool],
            signature='(i),(i,i)->(i)'))
    
    poisson_coverage = np.sum(is_covered_by_confidence_interval_vectorized(
        poisson_estimates, poisson_variances), axis=0)/len(poisson_estimates)
    
    quasi_likelihood_coverage = np.sum(is_covered_by_confidence_interval_vectorized(
        quasi_likelihood_estimates, quasi_likelihood_variances), axis=0)/len(quasi_likelihood_estimates)
    
    sandwich_coverage = np.sum(is_covered_by_confidence_interval_vectorized(
        sandwich_estimates, sandwich_variances), axis=0)/len(sandwich_estimates)
    
    results_beta_0[(b, 'Poisson')].loc[n] = poisson_coverage[0]
    results_beta_1[(b, 'Poisson')].loc[n] = poisson_coverage[1]

    results_beta_0[(b, 'Quasi-likelihood')].loc[n] = quasi_likelihood_coverage[0]
    results_beta_1[(b, 'Quasi-likelihood')].loc[n] = quasi_likelihood_coverage[1]
    
    results_beta_0[(b, 'Sandwich')].loc[n] = sandwich_coverage[0]
    results_beta_1[(b, 'Sandwich')].loc[n] = sandwich_coverage[1]

b=0.2, n=10
b=0.2, n=20
b=0.2, n=50
b=0.2, n=100
b=0.2, n=250
b=0.2, n=1000
b=1.0, n=10
b=1.0, n=20
b=1.0, n=50
b=1.0, n=100
b=1.0, n=250
b=1.0, n=1000
b=10.0, n=10
b=10.0, n=20
b=10.0, n=50
b=10.0, n=100
b=10.0, n=250
b=10.0, n=1000
b=1000.0, n=10
b=1000.0, n=20
b=1000.0, n=50
b=1000.0, n=100
b=1000.0, n=250
b=1000.0, n=1000


In [8]:
with open('p1_beta_0.tex', 'w') as f:
    f.write(results_beta_0.to_latex(
        float_format=lambda x: '{:.4f}'.format(x), escape=False,
        multicolumn_format='c'))

results_beta_0

$b$,0.2,0.2,0.2,1.0,1.0,1.0,10.0,10.0,10.0,1000.0,1000.0,1000.0
Model,Poisson,Quasi-likelihood,Sandwich,Poisson,Quasi-likelihood,Sandwich,Poisson,Quasi-likelihood,Sandwich,Poisson,Quasi-likelihood,Sandwich
$n$,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
10,0.925354,0.85964,0.813886,0.952956,0.855291,0.813948,0.962287,0.853401,0.815908,0.962834,0.850808,0.813177
20,0.939384,0.924678,0.890273,0.963671,0.921098,0.897811,0.971029,0.916083,0.897252,0.972103,0.916561,0.899298
50,0.931041,0.946031,0.933762,0.962628,0.948026,0.944983,0.970776,0.945663,0.945673,0.970531,0.946163,0.945833
100,0.8877,0.95081,0.94189,0.94439,0.9516,0.94794,0.95829,0.95066,0.94895,0.95834,0.95019,0.9492
250,0.87444,0.95563,0.94607,0.93864,0.95461,0.95074,0.951,0.95008,0.94871,0.95325,0.95091,0.94967
1000,0.86718,0.95768,0.94971,0.93367,0.95251,0.94936,0.9487,0.95015,0.94938,0.95162,0.95086,0.95081


In [9]:
with open('p1_beta_1.tex', 'w') as f:
    f.write(results_beta_1.to_latex(
        float_format=lambda x: '{:.4f}'.format(x), escape=False,
        multicolumn_format='c'))

results_beta_1

$b$,0.2,0.2,0.2,1.0,1.0,1.0,10.0,10.0,10.0,1000.0,1000.0,1000.0
Model,Poisson,Quasi-likelihood,Sandwich,Poisson,Quasi-likelihood,Sandwich,Poisson,Quasi-likelihood,Sandwich,Poisson,Quasi-likelihood,Sandwich
$n$,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
10,0.9685,0.851017,0.61844,0.986703,0.849214,0.659908,0.989162,0.84731,0.673888,0.990922,0.848199,0.678345
20,0.920581,0.886211,0.677401,0.961857,0.895026,0.750019,0.975506,0.896348,0.770914,0.977074,0.89868,0.77608
50,0.856509,0.910106,0.802068,0.928059,0.924115,0.861372,0.954858,0.933087,0.882212,0.957047,0.93445,0.884574
100,0.82125,0.91043,0.85366,0.91599,0.92989,0.89848,0.94637,0.94066,0.911,0.95256,0.9443,0.91422
250,0.79163,0.90161,0.8908,0.91028,0.93127,0.92449,0.94754,0.94674,0.93185,0.95155,0.94876,0.93407
1000,0.76271,0.88669,0.92271,0.9052,0.92846,0.94023,0.94575,0.94764,0.94499,0.94972,0.94928,0.94455
