In [135]:
import enum
import numpy as np
from scipy import stats
from scipy import linalg
from typing import Callable, NamedTuple, Sequence, Tuple

BETA = np.array([0., 0.5], dtype=np.float64)
DESIGN_1_CLUSTERS = [[7, 10, 13, 16]]
DESIGN_2_CLUSTERS = [[7, 10, 13], [7, 10, 16], [7, 13, 16], [10, 13, 16]]
WITHIN_CLUSTER_CORRELATIONS = [0.5, 0.9]

CorrelationStructure = enum.Enum(
    'CorrelationStructure',
    'NONE EXCHANGEABLE EXPONENTIAL')

In [203]:



class Experiment(NamedTuple('Experiment', [
    ('beta', np.array),
    ('error_variance', float),
    ('sample_cluster_covariates_fn', Callable[[], np.array]),
    ('within_cluster_correlation', float),
    ('within_cluster_correlation_structure', CorrelationStructure),
])):
    """Encapsulates parameters for the data generating mechanism."""
    
    def _make_within_cluster_covariance(self, cluster_size):
        correlation = np.eye(cluster_size)
        if self.within_cluster_correlation_structure == CorrelationStructure.EXCHANGEABLE:
            correlation[correlation == 0] = self.within_cluster_correlation
        elif self.within_cluster_correlation_structure == CorrelationStructure.EXPONENTIAL:
            for i in range(self.cluster_size):
                for j in range(i + 1, self.cluster_size):
                    correlation[i, j] = correlation[j, i] = np.power(
                        self.within_cluster_correlation, np.abs(j - i))
        return self.error_variance*correlation
    
    def sample_cluster(self) -> Tuple[np.array, np.array]:
        covariates = self.sample_cluster_covariates_fn()
        covariates = np.column_stack((np.ones(len(covariates)), covariates))
        covariance = self._make_within_cluster_covariance(len(covariates))
        response = stats.multivariate_normal(
            mean=np.matmul(covariates, self.beta), cov=covariance).rvs()
        return covariates, response
        
    @classmethod
    def from_template(
        cls,
        clusters,
        within_cluster_correlation,
        within_cluster_correlation_structure
    ) -> 'Experiment':
        assert len(set([len(cluster) for cluster in clusters])) == 1,\
               'Clusters must be the same size.'
        
        def sample_cluster_covariates_fn():
            return clusters[np.random.choice(len(clusters))]
        
        return cls(beta=BETA,
                   error_variance=1.,
                   sample_cluster_covariates_fn=sample_cluster_covariates_fn,
                   within_cluster_correlation=within_cluster_correlation,
                   within_cluster_correlation_structure=within_cluster_correlation_structure)
    
def run_experiment(experiment, num_trials):
    return {
        'ols': {
            CorrelationStructure.NONE: 0,
            CorrelationStructure.EXCHANGEABLE: 0,
        },
        'quasi_likelihood': {},
        'sandwich_estimator': {},
    }

experiment = Experiment.from_template(
    DESIGN_2_CLUSTERS,
    WITHIN_CLUSTER_CORRELATIONS[0],
    CorrelationStructure.EXCHANGEABLE)

run_experiment(experiment, 1024)
experiment.sample_cluster()

(array([[ 1., 10.],
        [ 1., 13.],
        [ 1., 16.]]), array([5.2887613 , 7.1128281 , 8.19946964]))

In [177]:
np.random.choice([[7, 9], [2, 3], [1, 2]])

ValueError: a must be 1-dimensional

In [42]:
linalg.cholesky(tmp).T.dot(linalg.cholesky(tmp))

array([[1. , 0.5, 0.5, 0.5],
       [0.5, 1. , 0.5, 0.5],
       [0.5, 0.5, 1. , 0.5],
       [0.5, 0.5, 0.5, 1. ]])

In [55]:
linalg.inv(tmp)

array([[ 1.6, -0.4, -0.4, -0.4],
       [-0.4,  1.6, -0.4, -0.4],
       [-0.4, -0.4,  1.6, -0.4],
       [-0.4, -0.4, -0.4,  1.6]])

In [52]:
np.sqrt(3)/6

0.28867513459481287