* Additional covariate: Baseline intensity varies by care home size.
* Data is daily number of cases from February to July. 
* 1000 Care homes
* 330 homes had cases, 670 no cases
* 2000 total cases
* 50% homes received discharge transfer from hospital
* 3000 discharges in total
* Data Frame 1: 1 row per day, 1 column per care home ID, entries are numbers of cases
* Data Frame 2: 1 row per day, 1 column per care home ID, entries are numbers of discharges
* Data frame 3: 1 row per covariate, 1 column per care home ID, entries are covariates (to start with just one, care home size, likely as a category with 4 groups).


In [2]:
MAX_BEDS_PER_HOME = 255
MAX_CATEGORIES = 4

N_CARE_HOMES = 1000
N_CASES = 2000
N_CASE_HOMES = 330
N_DISCHARGES = 3000
N_DISCHARGE_HOMES = 500
N_DAYS = 181
N_COVARIATES = 1

In [3]:
import numpy as np

def get_dtype(max_value):
    if max_value < 256:
        return np.int8
    else:
        return np.int32

BED_DTYPE = get_dtype(MAX_BEDS_PER_HOME)
CAT_DTYPE = get_dtype(MAX_CATEGORIES)


In [14]:
cases = np.zeros((N_DAYS+1, N_CARE_HOMES), dtype=np.int32)
discharges = np.zeros((N_DAYS+1, N_CARE_HOMES), dtype=np.int32)
covariates = np.zeros((N_COVARIATES+1, N_CARE_HOMES), dtype=np.int32)

In [15]:
MAX_CAREHOME_ID = 32767

rng = np.random.default_rng()
care_home_ids = rng.choice(MAX_CAREHOME_ID, size=N_CARE_HOMES, replace=False)

for sample_array in cases, discharges, covariates:
    sample_array[0] = rng.permutation(care_home_ids)
    
for sample_array, num_instances, num_places in (
        (cases, N_CASES, N_CASE_HOMES), (discharges, N_DISCHARGES, N_DISCHARGE_HOMES)
):
    for _ in range(num_instances):
        sample_array[1 + rng.integers(N_DAYS), rng.integers(num_places)] += 1

covariates[1] = rng.choice(MAX_CATEGORIES, size=N_CARE_HOMES)

In [16]:
np.savetxt("covariates.csv", covariates, fmt="%d", delimiter=',')
np.savetxt("cases.csv", cases, fmt="%d", delimiter=',')
np.savetxt("discharges.csv", discharges, fmt="%d", delimiter=',')