In [1]:
# notebook import

from ipynb.fs.full.data_processing import *

In [2]:
# user input parameters

# boolean parameter to dictate whether code is run
run = False

# boolean parameter to dictate use of print statements
debug = False

# boolean parameter to dictate whether results are written to .csv files
write = False

# list of timesteps for which to calculate mutual information for all subsets
timesteps = [51, 101, 151, 201, 251, 301, 351, 401]

In [3]:
# input: timestep and reference feature name of interest (timestep, feature_ref)
# output: dictionary mapping a well's id number to its list of values of a given feature at an individual timestep
def get_timestep_population_dict(timestep, feature_ref):
    population_dict = {key:[] for key in well_id_list}
    for well in experiment_dict:
        well_df = experiment_dict[well]
        timestep_vals = list((well_df.loc[well_df[timestep_ref] == timestep])[feature_ref])
        population_dict[well] = timestep_vals
    return population_dict

# input: list of measurements (population) and a bin configuration (bins)
# output: shannon entropy for a given population and number of bins
def shannon_entropy_fn(population, bins):
    hist, _ = np.histogram(population, bins=bins, density=True)
    hist = hist[hist > 0]
    return entropy(hist)

# input: dictionary of populations (population_dict), size of random joint sampling of the populations (subset_size), and bin configuration (bins)
# output: mutual information at a timestep 
def calculate_mi(population_dict, subset_size, bins):
    try:
        joint_pop = np.concatenate([np.random.choice(population_dict[pop], subset_size) for pop in population_dict])
        joint_shannon_entropy = shannon_entropy_fn(joint_pop, bins)

        shannon_entropy_list = []
        for _, pop in enumerate(population_dict):
            shannon_entropy = shannon_entropy_fn(population_dict[pop], bins)
            shannon_entropy_list.append(shannon_entropy)
        mean_entropy = sum(shannon_entropy_list) / len(shannon_entropy_list)
        
        mi = joint_shannon_entropy - mean_entropy

        return mi
    except:
        print("Error calculating MI.")
        
        return None

In [4]:
# `main()` function declaration

# intended use in coordination with `bin_validation_plot.ipynb`
# requires: import of `data_processing.ipynb` and user input parameters specified above
# output: None (assigns new global variable `mi_dict` and writes to specified output .csv file)

def main():
    
    # dictionary mapping each feature to its optimal bin edge configuration
    global bin_edges_dict
    bin_edges_dict = create_bin_edges_dict()
    
    # dictionary mapping each (feature, timestep) combination to its list of mutual information values
    global mi_dict
    mi_dict = {}
    
    # write
    if write:
        csv_fpath = results_fpath + bin_validation_ext + 'validation_mi.csv'
        append_row_csv(csv_fpath, ['feature', 'timestep', 'subset_size', 'mi'])
    
    for (name, ref) in zip(feature_name_list, feature_ref_list):
        
        # get bin configuration for feature
        bin_edges = bin_edges_dict[name]
        
        for t in timesteps:
            
            # get population dict for joint entropy calculation at specific timestep
            population_dict = get_timestep_population_dict(t, ref)
            
            for subset_size in range(1, int(ncells_avg_dict[t])+1):
                
                # calculate mi for each possible subset size
                mi = calculate_mi(population_dict, subset_size, bin_edges)
                if subset_size == 1: 
                    mi_dict[(name, t)] = [mi]
                else:
                    mi_dict[(name, t)].append(mi)
                
                # write
                if write:
                    append_row_csv(csv_fpath, [f'{name}', f'{t}', f'{subset_size}', f'{mi}'])
                    
    if debug:
        print('mi_dict:', type(mi_dict), '\n', mi_dict)
        
    return        

In [5]:
# call to `main()` function

if run:
    main()

mi_dict: <class 'dict'> 
 {('gfp', 51): [-0.791807292194258, -0.4588240521823317, -0.13786758166402313, -0.08391544468130396, -0.07022033398623106, 0.16685497313783682, 0.1904035294218076, 0.24855608019900188, 0.356768619085726, 0.2763574877706194, 0.24822070175005928, 0.2818775542164067, 0.27026065120340137, 0.24176547817388627, 0.24090902143293702, 0.31699542467558706, 0.38289540363593844, 0.31692111787291743, 0.28165346043255113, 0.2524874996106834, 0.35109341199853494, 0.40416295217132436, 0.41652992016592894, 0.4140044694029439, 0.326179951898963, 0.37851881023613165, 0.32985349907592765, 0.3748877603346119, 0.4218450202423658, 0.3639196412884691, 0.4482729906280327, 0.35130062548356955, 0.38219849356174507, 0.3915158156923195, 0.3434705304852712, 0.40592726142677416, 0.4006191990050816, 0.3984487991985324, 0.4087812894929317, 0.4158105574042157, 0.4284610836843674, 0.41876708942472396, 0.3868389387222062, 0.36320757599960674, 0.4152420506281289, 0.4441386300595589, 0.398350856936

In [26]:
print(mi_dict)

{('gfp', 51): [0.02909448072840659, -0.0005631416049394211, 0.047346789639604925, 0.09397598686203334, 0.02732471726705832, 0.0769911179081808, 0.07238314698375614, 0.050727983483795924, 0.09594342714451831, 0.06087147323043185, 0.10595682762241077, 0.08248472123788697, 0.09708631089637354, 0.10372046178520167, 0.09435281760784786, 0.1106306989183996, 0.1034073535439386, 0.11032635472357666, 0.09188057311568909, 0.11104888917143563, 0.1132729660793994, 0.09731658774777863, 0.07304734861163231, 0.10102964879327603, 0.11976447275649438, 0.1200705832430784, 0.1161615479548781, 0.11779149867537031, 0.1117345075987548, 0.11700599633862985, 0.12331209735122561, 0.10102713026083032, 0.11434911529277869, 0.12750852392851897, 0.13706632053536616, 0.10621106322032747, 0.12362620597995932, 0.13605549061679323, 0.12883040931641343, 0.13627669568531364, 0.12347656135810059, 0.13164303440068648, 0.13851169024706755, 0.10989130777103506, 0.11696847784144127, 0.1294359807299883, 0.12607963194733696, 0