In [1]:
import os

import numpy as np
from sklearn.utils import resample
from CML_tool.decorators import file_based_cacheing
from CML_tool.Utils import read_pickle



In [2]:
FILE_DIR = os.getcwd()

In [3]:
def gen_matrix(size:tuple=(10,20,50)):
    # Define different distribution parameters
    distributions_possibilities = [
        {"mu": 0, "sigma": 1, "label": "Standard Normal"},
        {"mu": 2, "sigma": 0.5, "label": "Narrow Peak"},
        {"mu": -1, "sigma": 2, "label": "Wide Spread"},
        {"mu": 3, "sigma": 1.5, "label": "Shifted Right"}
    ]
    
    distributions = np.random.choice(distributions_possibilities, replace=True, size=size[1])

    num_samples = size[0]
    num_bootstraps = size[2]

    # Generate data matrix
    matrix = np.column_stack([
            np.random.normal(dist['mu'], dist['sigma'], num_samples) 
            for dist in distributions
            ])

    # Perform bootstrapping using sklearn's resample
    boot_matrix = np.zeros((num_samples, len(distributions), num_bootstraps))
    for i in range(num_bootstraps):
        # For each column (distribution), resample using sklearn
        for j in range(len(distributions)):
            boot_matrix[:, j, i] = resample(
                matrix[:, j], 
                n_samples=num_samples, 
                replace=True, 
                random_state=i  # Use iteration as random state for reproducibility
            )
    print(f'Generated matrix with shape {boot_matrix.shape}')
    return boot_matrix

@file_based_cacheing(
    path= os.path.join(FILE_DIR, 'test_files'),
    filename='npz_dict',
    extension_desired='npz')
def mean_sign_dict(X: np.ndarray, **kwargs):
    X_mean = X.mean(axis=0).T
    sign = np.sign(np.mean(np.mean(X, axis=0), axis=1))
    print(f'Returning mean matrix with shape {X_mean.shape} and sign array with shape {sign.shape}')
    return {'X_mean':X_mean, 'sign':sign}

@file_based_cacheing(
    path= os.path.join(FILE_DIR, 'test_files'),
    filename='npz_array',
    extension_desired='npz')
def mean_sign_arr(X: np.ndarray, **kwargs):
    X_mean = X.mean(axis=0).T
    sign = np.sign(np.mean(np.mean(X, axis=0), axis=1))
    print(f'Returning mean matrix with shape {X_mean.shape} and sign array with shape {sign.shape}')
    return X_mean, sign

In [5]:
X = gen_matrix(size=(10,20,50))
arrays_dict, cached_dict = mean_sign_dict(X)
arrays_pkl, cached_arr = mean_sign_arr(X)
print(f'Was the DICT function chached?: {cached_dict}')
print(f'Was the ARRAY function chached?: {cached_arr}')

INFO:root:Function mean_sign_dict CACHED.
INFO:root:No NPZ file npz_array.npz was found but a PICKLE file with that name was found and retrieved.
INFO:root:Hence, function mean_sign_arr CACHED.


Generated matrix with shape (10, 20, 50)
Was the DICT function chached?: True
Was the ARRAY function chached?: True


In [6]:
file_dict_npz = np.load(os.path.join(FILE_DIR,'test_files','npz_dict.npz'), allow_pickle=True)
for i,key in enumerate(file_dict_npz.keys()):
    print(f'key #{i}: {key}')

key #0: X_mean
key #1: sign


In [12]:
file_arr = read_pickle(os.path.join(FILE_DIR,'test_files'),'npz_array.pkl')
len(file_arr)


2