# Quasilikelihood and semiparametric methods for the general linear model

In [1]:
import collections
import enum
import functools
import itertools
from multiprocessing import pool as pool_lib
from typing import Callable, List, NamedTuple, Sequence, Tuple

import numpy as np
import pandas as pd
from scipy import stats
from scipy import linalg

BETA = np.array([0., 0.5], dtype=np.float64)
DESIGN_CLUSTERS = collections.OrderedDict([
    ('I', [[7, 10, 13, 16]]),
    ('II', [[7, 10, 13], [7, 10, 16], [7, 13, 16], [10, 13, 16]])
])
NUM_CLUSTERS = [15, 30, 60]
WITHIN_CLUSTER_CORRELATIONS = [0.5, 0.9]

CorrelationStructure = enum.Enum(
    'CorrelationStructure',
    'NONE EXCHANGEABLE EXPONENTIAL')

EstimationMethod = enum.Enum(
    'EstimationMethod',
    'GLS QL Sandwich')

In [2]:
class Experiment(NamedTuple('Experiment', [
    ('beta', np.array),
    ('error_variance', float),
    ('num_clusters', Sequence[Tuple[np.array, np.array]]),
    ('clusters', Sequence[np.array]),
    ('within_cluster_correlation', float),
    ('within_cluster_correlation_structure', CorrelationStructure),
])):
    """Encapsulates parameters for the data generating mechanism."""
    
    def sample_clusters(self) -> List[Tuple[np.array, np.array]]:
        return [self._sample_cluster() for _ in range(self.num_clusters)]
    
    def _sample_cluster(self) -> Tuple[np.array, np.array]:
        covariates = self._sample_cluster_covariates()
        covariates = np.column_stack((np.ones(len(covariates)), covariates))
        covariance = self._make_within_cluster_covariance(len(covariates))
        response = stats.multivariate_normal(
            mean=np.matmul(covariates, self.beta), cov=covariance).rvs()
        return covariates, response
    
    def _sample_cluster_covariates(self) -> np.array:
        return self.clusters[np.random.choice(len(self.clusters))]
    
    def _make_within_cluster_covariance(self, cluster_size):
        correlation = np.eye(cluster_size)
        if self.within_cluster_correlation_structure == CorrelationStructure.EXCHANGEABLE:
            correlation[correlation == 0] = self.within_cluster_correlation
        elif self.within_cluster_correlation_structure == CorrelationStructure.EXPONENTIAL:
            for i in range(cluster_size):
                for j in range(i + 1, cluster_size):
                    correlation[i, j] = correlation[j, i] = np.power(
                        self.within_cluster_correlation, np.abs(j - i))
        return self.error_variance*correlation
        
    @classmethod
    def from_template(
        cls,
        clusters,
        num_clusters,
        within_cluster_correlation,
        within_cluster_correlation_structure) -> 'Experiment':
        assert len(set([len(cluster) for cluster in clusters])) == 1,\
               'Clusters must be the same size.'
        
        return cls(beta=BETA,
                   clusters=clusters,
                   error_variance=1.,
                   num_clusters=num_clusters,
                   within_cluster_correlation=within_cluster_correlation,
                   within_cluster_correlation_structure=within_cluster_correlation_structure)

In [3]:
def sum_dict(acc, result):
    if type(acc) == dict:        
        return {key: sum_dict(value, result[key]) for key, value in acc.items()}   
    return acc + result
    
def divide_dict(results, d):
    if type(results) == dict:
        return {key: divide_dict(value, d) for key, value in results.items()}
    return results/d

In [4]:
def estimate_rho(epsilon_hat):
    covariance = np.outer(epsilon_hat, epsilon_hat)    
    rho_exchangeable = 0.
    rho_exponential = 0.
    for i in range(len(covariance)):
        for j in range(i + 1, len(covariance[i])):
            rho_exchangeable += covariance[i, j]
            if j - i == 1:
                rho_exponential += covariance[i, j]
    rho_exchangeable /= (np.square(covariance.shape[0]) - covariance.shape[0])/2
    rho_exponential /= covariance.shape[0] - 1
    return rho_exchangeable, rho_exponential

def make_correlation_matrices(clusters, beta_hat, sigma_2_hat):
    rho_exchangeable = 0.
    rho_exponential = 0.        
    for X, y in clusters:
        cluster_rho_exchangeable, cluster_rho_exponential = estimate_rho(
            (y - X.dot(beta_hat))/np.sqrt(sigma_2_hat))
        rho_exchangeable += cluster_rho_exchangeable
        rho_exponential += cluster_rho_exponential
            
    rho_exchangeable /= len(clusters)
    rho_exponential /= len(clusters)
    
    correlation_matrices = []
    for X, y in clusters:
        exchangeable_matrix = np.eye(len(y))
        exchangeable_matrix[exchangeable_matrix == 0] = rho_exchangeable
        exponential_matrix = np.eye(len(y))
        for i in range(len(y) - 1):
            for j in range(i + 1, len(y)):
                exponential_matrix[i, j] = exponential_matrix[j, i] = np.power(rho_exponential, j - i)
        correlation_matrices.append({
            CorrelationStructure.NONE.name: np.eye(len(y)),
            CorrelationStructure.EXCHANGEABLE.name: exchangeable_matrix,
            CorrelationStructure.EXPONENTIAL.name: exponential_matrix,            
        })
        
    return correlation_matrices

def estimate_beta_hats(clusters, correlation_matrices):
    def project(X, y, weight):                
        return {
            'X': X.T.dot(weight).dot(X),
            'y': X.T.dot(weight).dot(y),
        }
    
    projections = functools.reduce(sum_dict, [
        {
            key: project(X, y, linalg.cho_solve(
                linalg.cho_factor(correlation_matrix), np.eye(len(correlation_matrix))))
            for key, correlation_matrix in matrix_dict.items()
        } 
        for (X, y), matrix_dict in zip(clusters, correlation_matrices)
    ])
        
    return {
        key: linalg.cho_solve(linalg.cho_factor(projection['X']), projection['y'])
        for key, projection in projections.items()
    }

def estimate_covariance(clusters,
                        correlation_matrices,
                        method,
                        beta_hat):
    if method != EstimationMethod.Sandwich:
        covariance = np.zeros((len(beta_hat), len(beta_hat)))
        dispersion_factor = 0.
        total = 0.
        for (X, y), correlation_matrix in zip(clusters, correlation_matrices):
            weight = linalg.cho_solve(
                linalg.cho_factor(correlation_matrix), np.eye(len(correlation_matrix)))
            covariance += X.T.dot(weight).dot(X)
            dispersion_factor += np.sum(np.square(y - X.dot(beta_hat)))
            total += len(y)
        covariance = linalg.cho_solve(linalg.cho_factor(covariance), np.eye(len(beta_hat)))
        dispersion_factor /= total - len(beta_hat)
        return covariance if method == EstimationMethod.GLS else covariance*dispersion_factor
    # Sandwich estimation.
    bread = np.zeros((len(beta_hat), len(beta_hat)))
    meat = np.zeros((len(beta_hat), len(beta_hat)))
    for (X, y), correlation_matrix in zip(clusters, correlation_matrices):
        weight = linalg.cho_solve(
                linalg.cho_factor(correlation_matrix), np.eye(len(correlation_matrix)))
        bread += X.T.dot(weight).dot(X)        
        epsilon_hat = y - X.dot(beta_hat)
        meat += X.T.dot(weight).dot(np.outer(epsilon_hat, epsilon_hat)).dot(weight).dot(X)        
    bread = linalg.cho_solve(linalg.cho_factor(bread), np.eye(len(beta_hat)))    
    return bread.dot(meat).dot(bread)
    
def run_experiment(experiment, estimate_beta=False):
    clusters = experiment.sample_clusters()
    X = np.vstack([X for X, _ in clusters])
    y = np.hstack([y for _, y in clusters])
    
    gram_matrix_ols = X.T.dot(X)
    
    beta_hat_ols = linalg.cho_solve(linalg.cho_factor(gram_matrix_ols), X.T.dot(y))
    sigma_2_hat_ols = np.sum(np.square(y - X.dot(beta_hat_ols)))/(len(y) - len(beta_hat_ols))
    
    correlation_matrices = make_correlation_matrices(
        clusters,
        beta_hat_ols,
        sigma_2_hat_ols)
    beta_hats = estimate_beta_hats(clusters, correlation_matrices)
    
    if estimate_beta:
        return beta_hats
    
    return {
        method.name: {
            correlation_structure: np.sqrt(estimate_covariance(
                clusters,
                [matrix_dict[correlation_structure] for matrix_dict in correlation_matrices],
                method,
                beta_hat)[1, 1])
            for correlation_structure, beta_hat in beta_hats.items()
        }
        for method in EstimationMethod
    }

In [5]:
def run_experiments(experiment, num_trials):
    pool = pool_lib.Pool(4)
    results = pool.map(run_experiment, [experiment]*num_trials)
    pool.close()
    results = functools.reduce(sum_dict, results)
    return divide_dict(results, num_trials)

In [6]:
experiments = [
    Experiment.from_template(design, num_clusters, correlation_coefficient, correlation_structure)
    for design, num_clusters, correlation_coefficient, correlation_structure
    in
    itertools.product(
       [DESIGN_CLUSTERS['I'], DESIGN_CLUSTERS['II']],
       NUM_CLUSTERS,
       WITHIN_CLUSTER_CORRELATIONS,
       [CorrelationStructure.EXCHANGEABLE, CorrelationStructure.EXPONENTIAL])
]

In [7]:
def index_experiment(experiment):    
    return (experiment.num_clusters,
            [k for k, v in DESIGN_CLUSTERS.items() if experiment.clusters == v][0],
            experiment.within_cluster_correlation_structure.name,
            experiment.within_cluster_correlation)

simulation_results = pd.DataFrame(
    index=pd.MultiIndex.from_product(
        [NUM_CLUSTERS, DESIGN_CLUSTERS.keys(),
         [CorrelationStructure.EXCHANGEABLE.name, CorrelationStructure.EXPONENTIAL.name],
         WITHIN_CLUSTER_CORRELATIONS,         
        ],
        names=['$n$', 'Design', 'Correlation structure', 'Correlation']),
    columns=pd.MultiIndex.from_product(
        [[value.name for value in EstimationMethod],
         [value.name for value in CorrelationStructure]],
        names=['Estimator', 'Assumed correlation']
    ))

In [8]:
for experiment in experiments:
    simulation_results.loc[index_experiment(experiment)] = (
        pd.DataFrame.from_dict(run_experiments(experiment, 4096), orient='index').stack())
simulation_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Estimator,GLS,GLS,GLS,QL,QL,QL,Sandwich,Sandwich,Sandwich
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Assumed correlation,NONE,EXCHANGEABLE,EXPONENTIAL,NONE,EXCHANGEABLE,EXPONENTIAL,NONE,EXCHANGEABLE,EXPONENTIAL
$n$,Design,Correlation structure,Correlation,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
15,I,EXCHANGEABLE,0.5,0.03849,0.028486,0.0378713,0.0376096,0.0275642,0.0369035,0.0257218,0.0257218,0.0261588
15,I,EXCHANGEABLE,0.9,0.03849,0.0146635,0.0246101,0.03698,0.0137585,0.0231898,0.0114968,0.0114968,0.0120548
15,I,EXPONENTIAL,0.5,0.03849,0.0317356,0.0379941,0.037663,0.0308837,0.0371019,0.0361906,0.0361906,0.0357685
15,I,EXPONENTIAL,0.9,0.03849,0.017354,0.0246015,0.0370272,0.0163324,0.0232136,0.0202105,0.0202105,0.0199779
15,II,EXCHANGEABLE,0.5,0.0447223,0.0345043,0.0402881,0.0440769,0.0337064,0.039519,0.0340208,0.0309187,0.0313046
15,II,EXCHANGEABLE,0.9,0.0447223,0.0190387,0.0250425,0.0435155,0.0181884,0.0239758,0.0259396,0.0140763,0.0145438
15,II,EXPONENTIAL,0.5,0.0447223,0.0365402,0.0402493,0.0441191,0.0358131,0.0395351,0.039112,0.0374805,0.0373715
15,II,EXPONENTIAL,0.9,0.0447223,0.0206638,0.0249189,0.0435549,0.0197552,0.0238625,0.0282496,0.0193085,0.0190658
30,I,EXCHANGEABLE,0.5,0.0272166,0.0197434,0.0268184,0.0267877,0.0193347,0.0263546,0.0186932,0.0186932,0.0190784
30,I,EXCHANGEABLE,0.9,0.0272166,0.00953959,0.016304,0.0265356,0.00918152,0.0157176,0.0083709,0.0083709,0.00880173


In [9]:
import os

if not os.path.isdir('simulation_results'):
    os.mkdir('simulation_results')

for key, values in simulation_results.iterrows():
    file_name = '-'.join(map(str, key)).replace('.', '_')
    with open('simulation_results/{}.tex'.format(file_name), 'w') as f:
        f.write(' & '.join(map(lambda v: str(np.round(v, decimals=5)), values.values)))