# Experiment Utilities

This notebook contains utility functions for experiments, such as data gathering, analysis, and visualisation functions.

## Data Gathering Functions

Contains various functions used to gather experiments data.

In [None]:
def do_experiment_per_shift(model, method, x_valid, y_valid, 
                            x_test, y_test, shift_type, orig_dims, shift_type_params=None,
                            num_classes=3):
    """
    Calculate the test statistics, p-value, and detection accuracy for a given method
    and shift on all combinations of number of test samples, shift intensities, 
    and proportion of test data that is affected by shift.

    :param model: the model can be a dimensionality reductor, CBM, CME, or blackbox classifier
    :param method: the shift detection method which can be:
        - 'BBSDs': softmax label classifier (BBSD)
        - 'BBSDh': argmax/ hard prediction label classifier 
        - 'BBSDs_concepts': softmax on the concept layer 
        - 'BBSDh_concepts': argmax/ hard prediction label classifier
        - 'PCA': used to reduce the dimension of x_valid and x_test
        - 'SRP': used to reduce the dimension of x_valid and x_test
        - 'UAE': Autoencoder based method to reduce the dimension of x_valid and x_test
        - 'TAE': Autoencoder based method to reduce the dimension of x_valid and x_test
        - 'NoRed': original dimensions of x_valid and x_test, no dimensionality reduction applied
    :param x_valid: validation data, which we hypothetically treat as the dataset that we have.
    :param x_test: test data, which we hypothetically treat as unseen real-world data, where shift might occur
    :param shift_type: a different type of shift which can be:
        - 'gaussian'
        - 'ko'
        - 'img': random image shift incorporating combination of below shift
        - 'width_shift'
        - 'height_shift'
        - 'rotation'
        - 'shear'
        - 'zoom'
        - 'flip'
        - If wishing to do combination of shift, give an array comprising the above parameters.
    :param shift_type_params: If needed, provide shift parameters (e.g., the original image size, see apply_shift_* functions)

    :return: a dictionary containing p-value and detection accuracy for all combination of shift intensities,
        shift proportion, and number of test samples:
        {
            "shift_intensities": {
                "shift_proportion": {
                    "test_samples: : {
                        "test_statistics": [],
                        "p_vals": [],
                        "detection_results: []
                    }
                }
            }
        }
    """

    # Possible value of intensities, data proportion affected, test set samples
    shift_intensities = ["small", "medium", "large"]
    shift_props = [0.1, 0.5, 1.0]
    test_set_samples = [10, 20, 50, 100, 200, 500, 1000, 10000]
    n_exp = 5 # number of experiments for each configuration (for reliability)

    ## Initialise dictionary used to store result
    dict_result = initialise_result_dictionary(shift_intensities, shift_props, test_set_samples)

    ## Consider all combinations of shift intensities, shift proportion, test samples
    for shift_intensity in tqdm(shift_intensities):
        for shift_prop in shift_props:
            for test_set_sample in test_set_samples:
                # Repeat the experiment n_exp times for more reliable data
                for i in range(n_exp):
                    # Get test set
                    x_test_subset, y_test_subset = get_random_data_subset(x_test, y_test, test_set_sample)

                    # Call apply shift method on the test set
                    x_test_shifted, y_test_shifted = apply_shift(x_test_subset, y_test_subset, 
                                                                    shift_type, shift_type_params, 
                                                                    shift_intensity, shift_prop)

                    # Perform detection:
                    # 1. Get reduced representation
                    # 2. Perform statistical test
                    test_statistic, p_val, detection_result = single_experiment(model, method, x_valid, y_valid, 
                                                                                x_test_shifted, y_test_shifted,
                                                                                orig_dims)

                    # 3. Store result
                    dict_result[shift_intensity][shift_prop][test_set_sample]["test_statistics"].append(test_statistic)
                    dict_result[shift_intensity][shift_prop][test_set_sample]["p_vals"].append(p_val)
                    dict_result[shift_intensity][shift_prop][test_set_sample]["detection_results"].append(detection_result)

    return dict_result

In [None]:
def apply_shift(x_test, y_test, shift_type, shift_type_params, shift_intensity, shift_prop):
    """
    Apply a type of shift to x_test and y_test.

    :param x_valid: validation data, which we hypothetically treat as the dataset that we have.
    :param x_test: test data, which we hypothetically treat as unseen real-world data, where shift might occur
    :param shift_type: a different type of shift which can be:
        - 'gaussian'
        - 'ko'
        - 'img': random image shift incorporating combination of below shift
        - 'width_shift'
        - 'height_shift'
        - 'rotation'
        - 'shear'
        - 'zoom'
        - 'flip'
        - If wishing to do combination of shift, give an array comprising the above parameters.
    :param shift_type_params: If needed, provide shift parameters (e.g., the original image size, see apply_shift_* functions)
    :param shift_intensity: "small", "medium", or "large"

    :return: (x_test_shifted, y_test_shifted)
    """

    # Prevent bugs, just copy the whole thing
    x_test_shifted = deepcopy(x_test)
    y_test_shifted = deepcopy(y_test)

    ## Apply shift accordingly
    if shift_type == "gaussian":
        x_test_shifted, y_test_shifted = apply_gaussian_shift(x_test_shifted, y_test_shifted, shift_intensity, shift_prop)
    
    elif shift_type == "ko":
        x_test_shifted, y_test_shifted = apply_ko_shift(x_test, y_test, shift_intensity, cl=shift_type_params["cl"])
    
    elif shift_type == "img":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop)
    
    elif shift_type == "width_shift":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=["width_shift"])
    
    elif shift_type == "height_shift":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=["height_shift"])
    
    elif shift_type == "rotation":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=["rotation"])
    
    elif shift_type == "shear":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=["shear"])
    
    elif shift_type == "zoom":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=["zoom"])
    
    elif shift_type == "flip":
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=["flip"])
    
    # Combination of shifts
    else:
        x_test_shifted, y_test_shifted = apply_img_shift(x_test, y_test, 
                                                         shift_type_params["orig_dims"], 
                                                         shift_intensity, shift_prop,
                                                         shift_types=shift_type)
    
    return x_test_shifted, y_test_shifted

In [None]:
def single_experiment(model, method, x_valid, y_valid, x_test, y_test, orig_dims, num_classes=3):
    """
    Used to perform single experiment for a given data. Fast experiment check.
    
    :param model: the model can be a dimensionality reductor, CBM, CME, or blackbox classifier
    :param method: the shift detection method which can be:
        - 'BBSDs': softmax label classifier (BBSD)
        - 'BBSDh': argmax/ hard prediction label classifier 
        - 'BBSDs_concepts': softmax on the concept layer 
        - 'BBSDh_concepts': argmax/ hard prediction label classifier
        - 'PCA': used to reduce the dimension of x_valid and x_test
        - 'SRP': used to reduce the dimension of x_valid and x_test
        - 'UAE': Autoencoder based method to reduce the dimension of x_valid and x_test
        - 'TAE': Autoencoder based method to reduce the dimension of x_valid and x_test
        - 'NoRed': original dimensions of x_valid and x_test, no dimensionality reduction applied
    :param x_valid: validation data, which we hypothetically treat as the dataset that we have.
    :param x_test: test data, which we hypothetically treat as unseen real-world data, where shift might occur
    
    :return: (test_statistic, p_val, detection_result)
    """

    concept_names = ["color", "shape", "scale", "rotation", "x", "y"]
    classes = [1, 3, 6, 40, 32, 32]

    ## BBSD Softmax
    if method == "BBSDs":
        # Valid representation
        repr_valid = model.predict(x_valid)
        
        # Test representation
        # Note: need to reshape test first as it is flatten previously
        repr_test = model.predict(x_test.reshape(-1, orig_dims[0], 
                                                 orig_dims[1],
                                                 orig_dims[2]))
        
        # Do multiple univariate testing
        p_val, p_vals, t_vals = one_dimensional_test(repr_test, repr_valid)
        alpha = 0.05 # standard significance test value
        alpha = alpha / repr_valid.shape[1] # Bonferroni correction (divide by number of components)
        if p_val < alpha:
            detection_result = 1 # there is shift
        else:
            detection_result = 0 # no shift found
        
        # Pack result for return
        test_statistic = t_vals
        p_val = p_vals
        detection_result = detection_result
    
    ## BBSD Argmax
    elif method == "BBSDh":
        repr_valid = np.argmax(model.predict(x_valid), axis=1)
        
        repr_test = np.argmax(model.predict(x_test.reshape(-1, orig_dims[0],
                                                           orig_dims[1],
                                                           orig_dims[2])))
        
        alpha = 0.05
        chi2, p_val = test_chi2_shift(repr_valid, repr_test, num_classes)

        if p_val < alpha:
            detection_result = 1
        else:
            detection_result = 0
        
        # Pack result for return
        test_statistic = chi2
        p_val = p_val
        detection_result = detection_result
    
    ## BBSD Softmax on concepts
    elif method == "BBSDs_concepts":
        # Valid representation
        preds = model.predict(x_valid)
        color_repr_valid = preds[0]
        shape_repr_valid = preds[1]
        scale_repr_valid = preds[2]
        rotation_repr_valid = preds[3]
        x_repr_valid = preds[4]
        y_repr_valid = preds[5]

        repr_valids = [color_repr_valid, shape_repr_valid, scale_repr_valid, 
                       rotation_repr_valid, x_repr_valid, y_repr_valid]
        
        # Test representation
        test_preds = model.predict(x_test.reshape(-1, orig_dims[0],
                                                  orig_dims[1],
                                                  orig_dims[2]))
        color_repr_test = test_preds[0]
        shape_repr_test = test_preds[1]
        scale_repr_test= test_preds[2]
        rotation_repr_test = test_preds[3]
        x_repr_test = test_preds[4]
        y_repr_test = test_preds[5]

        repr_tests = [color_repr_test, shape_repr_test, scale_repr_test, 
                       rotation_repr_test, x_repr_test, y_repr_test]
        
        # Prepare result
        test_statistic = {concept: None for concept in concept_names}
        p_val_dict = {concept: None for concept in concept_names}
        detection_result = {concept: None for concept in concept_names}

        # Do statistical test for each concept (one dimensional test)
        for concept, repr_valid, repr_test in zip(concept_names, repr_valids, repr_tests):
            p_val, p_vals, t_vals = one_dimensional_test(repr_valid, repr_test)
            alpha = 0.05 / repr_valid.shape[1] # Divided by number of components for Bonferroni correction
            test_statistic[concept] = t_vals
            p_val_dict[concept] = p_vals

            if p_val < alpha:
                detection_result[concept] = 1
            else:
                detection_result[concept] = 0
        p_val = p_val_dict
    
    ## BBSD Argmax on concepts
    elif method == "BBSDh_concepts":
        # Valid representation
        preds = model.predict(x_valid)
        color_repr_valid = np.argmax(preds[0], axis=1)
        shape_repr_valid = np.argmax(preds[1], axis=1)
        scale_repr_valid = np.argmax(preds[2], axis=1)
        rotation_repr_valid = np.argmax(preds[3], axis=1)
        x_repr_valid = np.argmax(preds[4], axis=1)
        y_repr_valid = np.argmax(preds[5], axis=1)

        repr_valids = [color_repr_valid, shape_repr_valid, scale_repr_valid, 
                       rotation_repr_valid, x_repr_valid, y_repr_valid]
        
        # Test representation
        test_preds = model.predict(x_test.reshape(-1, orig_dims[0],
                                                  orig_dims[1],
                                                  orig_dims[2]))
        color_repr_test = np.argmax(test_preds[0], axis=1)
        shape_repr_test = np.argmax(test_preds[1], axis=1)
        scale_repr_test= np.argmax(test_preds[2], axis=1)
        rotation_repr_test = np.argmax(test_preds[3], axis=1)
        x_repr_test = np.argmax(test_preds[4], axis=1)
        y_repr_test = np.argmax(test_preds[5], axis=1)

        repr_tests = [color_repr_test, shape_repr_test, scale_repr_test, 
                       rotation_repr_test, x_repr_test, y_repr_test]
        
        # Prepare result
        test_statistic = {concept: None for concept in concept_names}
        p_val_dict = {concept: None for concept in concept_names}
        detection_result = {concept: None for concept in concept_names}

        # Do statistical test for each concept (one dimensional test)
        for concept, repr_valid, repr_test, nc in zip(concept_names, repr_valids, repr_tests, classes):
            chi2, p_val = test_chi2_shift(repr_valid, repr_test, nc)
            alpha = 0.05
            test_statistic[concept] = chi2
            p_val_dict[concept] = p_val

            if p_val < alpha:
                detection_result[concept] = 1
            else:
                detection_result[concept] = 0
            
        p_val = p_val_dict
    
    return (test_statistic, p_val, detection_result)

## Visualisation Functions

Contain functions used to generate plots and tables.

## Helper Functions
Contains sub-functions used to aid the primary functions described in the sections above.

In [5]:
def initialise_result_dictionary(shift_intensities, shift_props, test_set_samples):
    """
    Initialise dictionary used to store result of the experiments.

    :param shift_intensities: all possible shift intensities
    :param shift_props: all possible shift proportions.
    :param test_set_samples: all possible test set samples.

    :return: empty dictionary used to store result.
    """

    dict_result = dict()

    ## Generate empty dictionary to store
    for shift_intensity in shift_intensities:
        dict_result[shift_intensity] = dict()
        for shift_prop in shift_props:
            dict_result[shift_intensity][shift_prop] = dict()
            for test_set_sample in test_set_samples:
                dict_result[shift_intensity][shift_prop][test_set_sample] = {
                    "test_statistics": [],
                    "p_vals": [],
                    "detection_results": []
                }
    
    return dict_result

In [9]:
def get_random_data_subset(x, y, test_set_sample):
    """
    Get random (subset) of data x and y.

    :param x: the feature/ image
    :param y: the label
    :param test_set_sample: number of sample in the new test set
    """

    # Random indices
    indices = np.random.choice(x.shape[0], test_set_sample, replace=False)

    # Data subsets
    x_subset = x[indices, :]
    y_subset = y[indices]

    return x_subset, y_subset