In [1]:
# notebook import

from ipynb.fs.full.data_processing import *

In [2]:
# user input parameters

# boolean parameter to dictate whether code is run
run = False

# boolean parameter to dictate use of print statements
debug = False

# boolean parameter to dictate whether results are written to .csv files
write = False

In [3]:
# input: timestep and reference feature name of interest (timestep, feature_ref)
# output: dictionary mapping a well's id number to its list of values of a given feature at an individual timestep
def get_timestep_population_dict(timestep, feature_ref):
    population_dict = {key:[] for key in well_id_list}
    for well in experiment_dict:
        well_df = experiment_dict[well]
        timestep_vals = list((well_df.loc[well_df[timestep_ref] == timestep])[feature_ref])
        population_dict[well] = timestep_vals
    return population_dict

# input: list of measurements (population) and a bin configuration (bins)
# output: shannon entropy for a given population and number of bins
def shannon_entropy_fn(population, bins):
    hist, _ = np.histogram(population, bins=bins, density=True)
    hist = hist[hist > 0]
    return entropy(hist)

# input: dictionary of populations (population_dict), size of random joint sampling of the populations (subset_size), and bin configuration (bins)
# output: mutual information at a timestep 
def calculate_mi(population_dict, subset_size, bins):
    try:
        joint_pop = np.concatenate([np.random.choice(population_dict[pop], subset_size) for pop in population_dict])
        joint_shannon_entropy = shannon_entropy_fn(joint_pop, bins)

        shannon_entropy_list = []
        for _, pop in enumerate(population_dict):
            shannon_entropy = shannon_entropy_fn(population_dict[pop], bins)
            shannon_entropy_list.append(shannon_entropy)
        mean_entropy = sum(shannon_entropy_list) / len(shannon_entropy_list)
        
        mi = joint_shannon_entropy - mean_entropy

        return mi
    except:
        print("Error calculating MI.")
        
        return None
    
# input: reference feature name of interst (feature_ref), bin configuration (bins), and an optional .csv output file path (csv_fpath)
# output: list of mutual information measurements over time for given feature
def get_mi_list(feature_ref, bins, csv_fpath=None):
    mi_list = []
    for timestep in timestep_list:
        population_dict = get_timestep_population_dict(timestep, feature_ref)
        subset_size = int(ncells_avg_dict[timestep]//len(well_id_list))
        mi = calculate_mi(population_dict, subset_size, bins)
        mi_list.append(mi)
        if csv_fpath != None:
            append_row_csv(csv_fpath, [mi])
    return mi_list

In [4]:
# `main()` function declaration

# intended use in coordination with `mi_calculation_plot.ipynb`
# requires: import of `data_processing.ipynb` and user input parameters specified above
# output: None (assigns new global variables `bin_edges_dict`, `mi_list_dict` and writes to output .csv files)

def main():
    
    # dictionary mapping each feature to its optimal bin edge configuration
    global bin_edges_dict
    bin_edges_dict = create_bin_edges_dict()
    
    # dictionary mapping each feature to a list of its mutual information over time
    global mi_list_dict
    mi_list_dict = {}
    
    for (name, ref) in zip(feature_name_list, feature_ref_list):
        
        # get list of mutual information values
        if write:
            csv_fpath = results_fpath + mi_calc_ext + f'{name}_mi_list.csv'
            append_row_csv(csv_fpath, ['mi'])
            mi_list = get_mi_list(ref, bin_edges_dict[name], csv_fpath)
        
        else:
            mi_list = get_mi_list(ref, bin_edges_dict[name])
            
        mi_list_dict[name] = mi_list
        
    if debug:
        print('mi_list_dict:', type(mi_list_dict), '\n', mi_list_dict)
        
    return

In [5]:
# call to `main()` function

if run:
    main()

mi_list_dict: <class 'dict'> 
 {'gfp': [1.5085351135011678, 1.359773342045873, 1.3497875373044437, 1.2371424657438967, 1.1555543960268593, 1.3208369295365379, 1.4857128518876306, 1.110460222560147, 1.1689639661009146, 1.2233703459740712, 1.1617396282781383, 1.065786222098779, 0.8730134268247554, 1.016395472975955, 0.9428663178886936, 1.0148403293763697, 0.8987432472371064, 0.9916105713060852, 0.9400101044030094, 0.738023912243889, 0.7250077538516262, 0.5567813678785827, 0.5942587659497973, 0.5078776229384219, 0.5459178358292869, 0.48425474909323274, 0.27265607964130023, 0.5524072591457618, 0.19707300704944064, 0.36300869063493835, 0.2740266098633426, 0.2743992944052902, 0.3185498530844666, 0.3232681025321624, 0.13220575145880087, 0.15622333359696672, 0.18482777765892733, 0.20815638440463946, 0.20832858865009385, 0.2057539738325147, 0.2867925693697462, 0.31585750281303193, 0.3658064784178543, 0.33025222381184083, 0.32372861220322147, 0.3041180963953418, 0.3430396622439176, 0.33757580482