In [1]:
import pickle
import os
from rulu.bootstrap_CI_test import *
from rulu.normal_normal_model import get_samples
from rulu.utils import get_test_params, find_all_tests_in_same_category
from matplotlib import pyplot as plt
from scipy.stats import percentileofscore
import time

In [2]:
def print_test_collection_result(test_collection: List[BootstrapCITest]) -> None:
    if test_collection is None or len(test_collection) == 0:
        print("There is nothing in the provided test collection.")
        return

    within_CI = [test.theoretical_quantity_in_sample_CI()
                 for test in test_collection]

    print(test_collection[0].test_name() +
          ": {}/{} ({}%) "
          .format(np.sum(within_CI), len(within_CI),
                  np.round(100.0 * np.sum(within_CI) / len(within_CI), 2)) +
          "of the tests have the theoretical quantity within the CI.")


def save_test_collection(test_collection: List[BootstrapCITest], in_dir: str = './output/') -> None:
    """
    Save given `test_collection` as a pickle file in `in_dir`
    :param test_collection: List of tests
    :param in_dir: Output directory
    :return: None
    """
    if test_collection is None or len(test_collection) == 0:
        return

    file_name = (in_dir + str(test_collection[0].__class__.__name__) +
                 "_" + str(int(time.time())) + ".pickle")
    pickle_file = open(file_name, 'wb')
    pickle.dump(test_collection, pickle_file)

    print("The test collection is saved at " + file_name)


def find_all_tests_in_same_category(test, in_dir='../output'):
    """
    Retrieve all tests in `in_dir` that is of the same type as the specified `test`
    """
    
    def get_tests_from_pickle_file(file_path):
        filehandler = open(file_path, 'rb')
        return pickle.load(filehandler)
    
    tests_pickle_fps = [
        os.path.join(in_dir, file)
        for file in os.listdir(in_dir) 
        if str(test.__class__.__name__) in file]
    
    return [test for tests in map(get_tests_from_pickle_file, tests_pickle_fps)
            for test in tests]

In [3]:
num_tests = 100
num_runs = 10000
BOOTSTRAP_BATCH_SIZE = 100
num_bootstrap_samples = 2000

In [4]:
# Prepare test collections
var_V_tests = []
cov_V1_V2_tests = []
var_D_tests = []

bootstrap_test_collections = [
    var_V_tests,
    cov_V1_V2_tests,
    var_D_tests
]

for num_test in range(0, num_tests):
    try:
        # Sample the parameters from a realistic parameter space
        params = get_test_params()
        N = int(params['N'])
        M = int(params['M'])
        mu_X = params['mu_X']
        mu_epsilon = params['mu_epsilon']
        sigma_sq_X = params['sigma_sq_X']
        sigma_sq_1 = params['sigma_sq_1']
        sigma_sq_2 = params['sigma_sq_2']
        r = params['r']
        s = params['s']

        # Prepare a test
        var_V_test = VarVCITest(sigma_sq_X, N, M, sigma_sq_1=sigma_sq_1)
        cov_V1_V2_test = CovV1V2CITest(sigma_sq_X, sigma_sq_1, sigma_sq_2, N, M)
        var_D_test = VarDCITest(sigma_sq_X, sigma_sq_1, sigma_sq_2, N, M)

        # Arrange the covariance tests IN THE SAME ORDER as that in the collections
        bootstrap_tests = [var_V_test,
                           cov_V1_V2_test,
                           var_D_test
                          ]
        
        # Get the initial samples
        print("Test {}/{} (N={}, M={}): calculating initial samples...    "
              .format(num_test + 1, num_tests, N, M),
              end='\r')
        
        samples = get_samples(num_runs, N, M, mu_X, mu_epsilon,
                              sigma_sq_X, sigma_sq_1, sigma_sq_2,
                              verbose=False, r=r, s=s)
        
        for test in bootstrap_tests:
            test.set_initial_samples(samples)
        
        # Bootstrap from the initial samples in batches
        for i in range(0, int(num_bootstrap_samples / BOOTSTRAP_BATCH_SIZE)):
            print("Test {}/{} (N={}, M={}): calculating bootstrap samples {}/{}...    "
                  .format(num_test + 1, num_tests, N, M,
                          i * BOOTSTRAP_BATCH_SIZE, num_bootstrap_samples),
                  end='\r')
            
            for test in bootstrap_tests:
                test.generate_bootstrap_samples(BOOTSTRAP_BATCH_SIZE)
        
        # Generate bootstrap samples that can't fit in a batch
        for test in bootstrap_tests:
                test.generate_bootstrap_samples(num_bootstrap_samples % BOOTSTRAP_BATCH_SIZE)

        # Once a test is completed (i.e. we collected enough covariate
        # samples), we added them to the corresponding, existing
        # collection of tests
        for (bootstrap_test, bootstrap_test_collection) in \
                [(bootstrap_tests[i], bootstrap_test_collections[i]) 
                 for i in range(0, len(bootstrap_tests))]:
            bootstrap_test_collection.append(bootstrap_test)
                        
        print("Test {}/{} (N={}, M={}): Done.                                         "
              .format(num_test + 1, num_tests, N, M))

    except KeyboardInterrupt:
        # Breaking the for loop is sufficient, as we want to
        # analyse the results accumulated so far
        break

# Print some statistics and save the test collections for future reference
# once we reached the number of experiments / process is interrupted
for bootstrap_test_collection in bootstrap_test_collections:
    try:
        print_test_collection_result(bootstrap_test_collection)
    except KeyboardInterrupt:
        print("{}: Skipped.".format(bootstrap_test_collection[0].test_name()))
    save_test_collection(bootstrap_test_collection)

Test 1/100 (N=14, M=7): Done.                                         
Test 2/100 (N=11, M=5): Done.                                         
Test 3/100 (N=178, M=45): Done.                                         
Test 4/100 (N=120, M=32): Done.                                         
Test 5/100 (N=19, M=6): Done.                                         
Test 6/100 (N=503, M=341): Done.                                         
Test 7/100 (N=2512, M=858): Done.                                         
Test 8/100 (N=220, M=164): Done.                                         
Test 9/100 (N=2642, M=1643): Done.                                         
Test 10/100 (N=309, M=76): Done.                                         
Test 11/100 (N=1511, M=595): Done.                                         
Test 12/100 (N=918, M=714): Done.                                         
Test 13/100 (N=36, M=17): Done.                                         
Test 14/100 (N=569, M=27): Done.            

In [7]:
for bootstrap_test_collection in bootstrap_test_collections:
    save_test_collection(bootstrap_test_collection, in_dir="./output/bootstrap_CI_tests/")

The test collection is saved at ./output/bootstrap_CI_tests/VarVCITest_1577960139.pickle
The test collection is saved at ./output/bootstrap_CI_tests/CovV1V2CITest_1577960141.pickle
The test collection is saved at ./output/bootstrap_CI_tests/VarDCITest_1577960145.pickle


In [None]:
examples = find_all_tests_in_same_category(VarVCITest(sigma_sq_1=0.5**2), in_dir='./output/bootstrap_CI_tests/')
for test in examples:
    test.theoretical_quantity_cache = None

print_test_collection_result(examples)

Calculating theoretical quantity / sample CI for test 6/27...

In [5]:
theoretical_sample_percentile = [test.theoretical_quantity_sample_percentile() for test in examples]

In [None]:
plt.hist(theoretical_sample_percentile, bins=range(0, 105, 5))
plt.xlim(0, 100)
plt.show()