In [743]:
import enum
import functools
import numpy as np
import pandas as pd
from scipy import stats
from scipy import linalg
from typing import Callable, List, NamedTuple, Sequence, Tuple

BETA = np.array([0., 0.5], dtype=np.float64)
DESIGN_CLUSTERS = {
    'I': [[7, 10, 13, 16]],
    'II': [[7, 10, 13], [7, 10, 16], [7, 13, 16], [10, 13, 16]],
}
NUM_CLUSTERS = [15, 30, 60]
WITHIN_CLUSTER_CORRELATIONS = [0.5, 0.9]

CorrelationStructure = enum.Enum(
    'CorrelationStructure',
    'NONE EXCHANGEABLE EXPONENTIAL')

EstimationMethod = enum.Enum(
    'EstimationMethod',
    'GLS QL Sandwich')

In [744]:
class Experiment(NamedTuple('Experiment', [
    ('beta', np.array),
    ('error_variance', float),
    ('num_clusters', Sequence[Tuple[np.array, np.array]]),
    ('clusters', Sequence[np.array]),
    ('within_cluster_correlation', float),
    ('within_cluster_correlation_structure', CorrelationStructure),
])):
    """Encapsulates parameters for the data generating mechanism."""
    
    def sample_clusters(self) -> List[Tuple[np.array, np.array]]:
        return [self._sample_cluster() for _ in range(self.num_clusters)]
    
    def _sample_cluster(self) -> Tuple[np.array, np.array]:
        covariates = self._sample_cluster_covariates()
        covariates = np.column_stack((np.ones(len(covariates)), covariates))
        covariance = self._make_within_cluster_covariance(len(covariates))
        response = stats.multivariate_normal(
            mean=np.matmul(covariates, self.beta), cov=covariance).rvs()
        return covariates, response
    
    def _sample_cluster_covariates(self) -> np.array:
        return self.clusters[np.random.choice(len(self.clusters))]
    
    def _make_within_cluster_covariance(self, cluster_size):
        correlation = np.eye(cluster_size)
        if self.within_cluster_correlation_structure == CorrelationStructure.EXCHANGEABLE:
            correlation[correlation == 0] = self.within_cluster_correlation
        elif self.within_cluster_correlation_structure == CorrelationStructure.EXPONENTIAL:
            for i in range(cluster_size):
                for j in range(i + 1, cluster_size):
                    correlation[i, j] = correlation[j, i] = np.power(
                        self.within_cluster_correlation, np.abs(j - i))
        return self.error_variance*correlation
        
    @classmethod
    def from_template(
        cls,
        clusters,
        num_clusters,
        within_cluster_correlation,
        within_cluster_correlation_structure) -> 'Experiment':
        assert len(set([len(cluster) for cluster in clusters])) == 1,\
               'Clusters must be the same size.'
        
        return cls(beta=BETA,
                   clusters=clusters,
                   error_variance=1.,
                   num_clusters=num_clusters,
                   within_cluster_correlation=within_cluster_correlation,
                   within_cluster_correlation_structure=within_cluster_correlation_structure)

In [745]:
def sum_dict(acc, result):
    if type(acc) == dict:        
        return {key: sum_dict(value, result[key]) for key, value in acc.items()}   
    return acc + result
    
def divide_dict(results, d):
    if type(results) == dict:
        return {key: divide_dict(value, d) for key, value in results.items()}
    return results/d

In [800]:
def estimate_rho(epsilon_hat):
    covariance = np.outer(epsilon_hat, epsilon_hat)    
    rho_exchangeable = 0.
    rho_exponential = 0.
    for i in range(len(covariance)):
        for j in range(i + 1, len(covariance[i])):
            rho_exchangeable += covariance[i, j]
            if j - i == 1:
                rho_exponential += covariance[i, j]
    cluster_pairwise_count = (covariance.shape[0] - 1)*(covariance.shape[1] - 2)
    rho_exchangeable /= (np.square(covariance.shape[0]) - covariance.shape[0])/2
    rho_exponential /= covariance.shape[0] - 1
    return rho_exchangeable, rho_exponential

def make_correlation_matrices(clusters, beta_hat, sigma_2_hat):
    rho_exchangeable = 0.
    rho_exponential = 0.        
    for X, y in clusters:
        cluster_rho_exchangeable, cluster_rho_exponential = estimate_rho(
            (y - X.dot(beta_hat))/np.sqrt(sigma_2_hat))
        rho_exchangeable += cluster_rho_exchangeable
        rho_exponential += cluster_rho_exponential
            
    rho_exchangeable /= len(clusters)
    rho_exponential /= len(clusters)
    
    correlation_matrices = []
    for X, y in clusters:
        exchangeable_matrix = np.eye(len(y))
        exchangeable_matrix[exchangeable_matrix == 0] = rho_exchangeable
        exponential_matrix = np.eye(len(y))
        for i in range(len(y) - 1):
            for j in range(i + 1, len(y)):
                exponential_matrix[i, j] = exponential_matrix[j, i] = np.power(rho_exponential, j - i)
        correlation_matrices.append({
            CorrelationStructure.NONE.name: np.eye(len(y)),
            CorrelationStructure.EXCHANGEABLE.name: exchangeable_matrix,
            CorrelationStructure.EXPONENTIAL.name: exponential_matrix,            
        })
        
    return correlation_matrices

def estimate_beta_hats(clusters, correlation_matrices):
    def estimate_beta_hat(X, y, correlation_matrix):
        weight = linalg.cho_solve(
            linalg.cho_factor(correlation_matrix), np.eye(len(correlation_matrix)))
        gram_matrix = linalg.cho_factor(X.T.dot(weight).dot(X))
        return linalg.cho_solve(gram_matrix, X.T.dot(weight).dot(y))
    
    beta_hats = [
        {
            key: estimate_beta_hat(X, y, inv_weight)
            for key, inv_weight in inv_weights.items()
        } for (X, y), inv_weights in zip(clusters, correlation_matrices)
    ]
    return divide_dict(functools.reduce(sum_dict, beta_hats), len(beta_hats))

def estimate_covariance(clusters,
                        correlation_matrices,
                        method,
                        beta_hat):
    if method != EstimationMethod.Sandwich:
        covariance = np.zeros((len(beta_hat), len(beta_hat)))
        dispersion_factor = 0.
        total = 0.
        for (X, y), correlation_matrix in zip(clusters, correlation_matrices):
            weight = linalg.cho_solve(
                linalg.cho_factor(correlation_matrix), np.eye(len(correlation_matrix)))
            covariance += X.T.dot(weight).dot(X)
            dispersion_factor += np.sum(np.square(y - X.dot(beta_hat)))
            total += len(y)
        covariance = linalg.cho_solve(linalg.cho_factor(covariance), np.eye(len(beta_hat)))
        dispersion_factor /= total - len(beta_hat)
        return covariance if method == EstimationMethod.GLS else covariance*dispersion_factor
    
    return np.zeros((len(beta_hat), len(beta_hat)))
    
def run_experiment(experiment, estimate_beta=False):
    clusters = experiment.sample_clusters()
    X = np.vstack([X for X, _ in clusters])
    y = np.hstack([y for _, y in clusters])
    
    gram_matrix_ols = X.T.dot(X)
    
    beta_hat_ols = linalg.cho_solve(linalg.cho_factor(gram_matrix_ols), X.T.dot(y))
    sigma_2_hat_ols = np.sum(np.square(y - X.dot(beta_hat_ols)))/(len(y) - len(beta_hat_ols))
    
    correlation_matrices = make_correlation_matrices(
        clusters,
        beta_hat_ols,
        sigma_2_hat_ols)
    beta_hats = estimate_beta_hats(clusters, correlation_matrices)
    
    if estimate_beta:
        return beta_hats
    
    return {
        method.name: {
            correlation_structure: np.sqrt(estimate_covariance(
                clusters,
                [matrix_dict[correlation_structure] for matrix_dict in correlation_matrices],
                method,
                beta_hat)[1, 1])
            for correlation_structure, beta_hat in beta_hats.items()
        }
        for method in EstimationMethod
    }
    
    print(correlation_matrices[0])
    print(correlation_matrices[1])
        
    return {
        EstimationMethod.GLS.name: {
            CorrelationStructure.NONE.name: 0, #np.sqrt(gram_matrix_inv[1, 1]),
            CorrelationStructure.EXCHANGEABLE.name: 0.,
            CorrelationStructure.EXPONENTIAL.name: 0.,
        },
        EstimationMethod.QL.name: {
            CorrelationStructure.NONE.name: 0, #np.sqrt(gram_matrix_inv[1, 1]*sigma_2_hat),
            CorrelationStructure.EXCHANGEABLE.name: 0.5,
            CorrelationStructure.EXPONENTIAL.name: 0.,            
        },
        EstimationMethod.Sandwich.name: {
            CorrelationStructure.NONE.name: 0,#np.sqrt(sandwich_variance[1, 1]),
            CorrelationStructure.EXCHANGEABLE.name: 0.,
            CorrelationStructure.EXPONENTIAL.name: 0.,
        },
    }

def run_experiments(experiment, num_trials):                    
    results = [run_experiment(experiment) for _ in range(num_trials)]
    results = functools.reduce(sum_dict, results)
    return divide_dict(results, num_trials)

experiment = Experiment.from_template(
    DESIGN_CLUSTERS['I'],
    NUM_CLUSTERS[2],
    WITHIN_CLUSTER_CORRELATIONS[0],
    CorrelationStructure.EXPONENTIAL)

tmp = run_experiments(experiment, 128)
#tmp = run_experiment(experiment)
#experiment.sample_clusters()
tmp

{'GLS': {'EXCHANGEABLE': 0.015576944643173045,
  'EXPONENTIAL': 0.018950136586479142,
  'NONE': 0.019245008972987462},
 'QL': {'EXCHANGEABLE': 0.015503838166205111,
  'EXPONENTIAL': 0.018877295232905236,
  'NONE': 0.019179316383929827},
 'Sandwich': {'EXCHANGEABLE': 0.0, 'EXPONENTIAL': 0.0, 'NONE': 0.0}}

In [796]:
beta_hats = [
    run_experiment(
        experiment, estimate_beta=True)[CorrelationStructure.EXCHANGEABLE.name]
    for _ in range(256)
]
np.sqrt(np.var(beta_hats, ddof=1, axis=0))

array([0.24412215, 0.01982407])

In [784]:
np.sqrt(0.02239943)

0.14966439122249486

In [740]:
pd.DataFrame.from_dict(tmp, orient='index').stack()

GLS       NONE            0
          EXCHANGEABLE    0
          EXPONENTIAL     0
QL        NONE            0
          EXCHANGEABLE    0
          EXPONENTIAL     0
Sandwich  NONE            0
          EXCHANGEABLE    0
          EXPONENTIAL     0
dtype: int64

In [390]:
def index_experiment(experiment):    
    return (experiment.num_clusters,
            [k for k, v in DESIGN_CLUSTERS.items() if experiment.clusters == v][0],
            experiment.within_cluster_correlation_structure.name,
            experiment.within_cluster_correlation)

simulation_results = pd.DataFrame(
    index=pd.MultiIndex.from_product(
        [NUM_CLUSTERS, DESIGN_CLUSTERS.keys(),
         [CorrelationStructure.EXCHANGEABLE.name, CorrelationStructure.EXPONENTIAL.name],
         WITHIN_CLUSTER_CORRELATIONS],
        names=['$n$', 'Design', 'Correlation structure', 'Correlation']),
    columns=pd.MultiIndex.from_product(
        [[value.name for value in EstimationMethod],
         [value.name for value in CorrelationStructure]],
        names=['Estimator', 'Assumed correlation']
    ))

simulation_results.loc[index_experiment(experiment)] = (
    pd.DataFrame.from_dict(tmp, orient='index').stack())
simulation_results.to_dict()

{('OLS', 'EXCHANGEABLE'): {(15, 'I', 'EXCHANGEABLE', 0.5): nan,
  (15, 'I', 'EXCHANGEABLE', 0.9): nan,
  (15, 'I', 'EXPONENTIAL', 0.5): nan,
  (15, 'I', 'EXPONENTIAL', 0.9): nan,
  (15, 'II', 'EXCHANGEABLE', 0.5): nan,
  (15, 'II', 'EXCHANGEABLE', 0.9): nan,
  (15, 'II', 'EXPONENTIAL', 0.5): nan,
  (15, 'II', 'EXPONENTIAL', 0.9): nan,
  (30, 'I', 'EXCHANGEABLE', 0.5): nan,
  (30, 'I', 'EXCHANGEABLE', 0.9): nan,
  (30, 'I', 'EXPONENTIAL', 0.5): nan,
  (30, 'I', 'EXPONENTIAL', 0.9): nan,
  (30, 'II', 'EXCHANGEABLE', 0.5): nan,
  (30, 'II', 'EXCHANGEABLE', 0.9): nan,
  (30, 'II', 'EXPONENTIAL', 0.5): nan,
  (30, 'II', 'EXPONENTIAL', 0.9): nan,
  (60, 'I', 'EXCHANGEABLE', 0.5): nan,
  (60, 'I', 'EXCHANGEABLE', 0.9): nan,
  (60, 'I', 'EXPONENTIAL', 0.5): nan,
  (60, 'I', 'EXPONENTIAL', 0.9): nan,
  (60, 'II', 'EXCHANGEABLE', 0.5): nan,
  (60, 'II', 'EXCHANGEABLE', 0.9): nan,
  (60, 'II', 'EXPONENTIAL', 0.5): 0.0,
  (60, 'II', 'EXPONENTIAL', 0.9): nan},
 ('OLS', 'EXPONENTIAL'): {(15, 'I', 'E

<CorrelationStructure.NONE: 1>

In [177]:
np.random.choice([[7, 9], [2, 3], [1, 2]])

ValueError: a must be 1-dimensional

In [42]:
linalg.cholesky(tmp).T.dot(linalg.cholesky(tmp))

array([[1. , 0.5, 0.5, 0.5],
       [0.5, 1. , 0.5, 0.5],
       [0.5, 0.5, 1. , 0.5],
       [0.5, 0.5, 0.5, 1. ]])

In [55]:
linalg.inv(tmp)

array([[ 1.6, -0.4, -0.4, -0.4],
       [-0.4,  1.6, -0.4, -0.4],
       [-0.4, -0.4,  1.6, -0.4],
       [-0.4, -0.4, -0.4,  1.6]])

In [52]:
np.sqrt(3)/6

0.28867513459481287