In [1]:
# notebook import

from ipynb.fs.full.data_processing import *

In [21]:
# user input parameters

# boolean parameter to dictate whether code is run
run = True

# boolean parameter to dictate use of print statements
debug = True

# boolean parameter to dictate whether results are written to .csv files
write = True

In [22]:
# function declaration 

# input: well and timestep of interest
# output: mutual information between rfp and gfp in the given well at timestep 
def get_rfp_gfp_mi(well, t):
    
    well_df = experiment_dict[well]

    # get gfp and rfp series at timestep in given well
    gfp_series = well_df.loc[well_df[timestep_ref] == t]['mean_F_C2']
    rfp_series = well_df.loc[well_df[timestep_ref] == t]['mean_F_C3']
            
    # get gfp and rfp histograms given their bin configurations
    gfp_hist, _ = np.histogram(gfp_series, bins=bin_gfp)
    rfp_hist, _ = np.histogram(rfp_series, bins=bin_rfp)
     
    # normalize gfp and rfp probabilirty distributions
    gfp_prob = gfp_hist / gfp_series.size
    rfp_prob = rfp_hist / rfp_series.size
    
    # make 2d histogram to get gfp and rfp joint distribution
    hist2d, _, _ = np.histogram2d(gfp_series, rfp_series, bins=[bin_gfp, bin_rfp])

    joint_prob = hist2d / np.sum(hist2d)            

    # malculate mutual information between gfp and rfp
    mi = np.sum(joint_prob * np.log2((joint_prob + epsilon) / (np.outer(gfp_prob, rfp_prob) + epsilon)))        
    
    return mi


In [23]:
# `main()` function declaration

# intended use in coordination with `mi_correlation_plot.ipynb`
# requires: import of `data_processing.ipynb` and user input parameters specified above
# output: None (assigns new global variable `mi_dict` and writes to output .csv files)

def main():
    
    global epsilon
    epsilon = np.finfo(float).eps

    bin_edges_dict = create_bin_edges_dict()
    
    global bin_gfp
    bin_gfp = bin_edges_dict['gfp']
    
    global bin_rfp
    bin_rfp = bin_edges_dict['rfp']
    
    global mi_dict
    mi_dict = {key:[] for key in well_id_list}
    
    if write:
        for well in well_id_list:
            csv_fpath = results_fpath + correlation_ext + f'mi_{well}.csv'
            append_row_csv(csv_fpath, ['mi'])
            
    for t in timestep_list:
        
        for well in well_id_list:
            
            mi = get_rfp_gfp_mi(well, t)
            mi_dict[well].append(mi)
            
            if write:
                csv_fpath = results_fpath + correlation_ext + f'mi_{well}.csv'
                append_row_csv(csv_fpath, [mi])
    
    if debug:
        print('mi_dict:', type(mi_dict), '\n', mi_dict)
    
    return

                    

In [24]:
# call to `main()` function

if run:
    main()

mi_dict: <class 'dict'> 
 {'01': [2.873140679513105, 2.2096909728231555, 2.5073801024236158, 2.7254805569978515, 2.1039099102823475, 2.503258334775631, 2.921928094887341, 2.9232314287975854, 3.1219280948873367, 2.646439344670996, 2.5216406363433075, 1.8910611120726453, 2.4116022179746497, 2.6635327548042365, 2.459147917027224, 2.118078209349694, 2.299896391167877, 1.8676338909711998, 2.1887218755408506, 2.8582783084179995, 2.9086949695627973, 2.9096743265454847, 2.5048003829822996, 2.740601562950668, 2.1545213056505848, 2.131173877209469, 2.54274832644605, 2.2473123427018264, 2.244983803976461, 1.8307450473254794, 2.0404919039042975, 1.8786067202551848, 2.3958004558137898, 1.8601619999286516, 1.726027620279047, 1.7828963078038638, 1.860675133583864, 1.5250877154216846, 1.6374388023206756, 1.479454678352223, 1.390257393158115, 1.0887295000920694, 1.0794888576370876, 0.9044699580266671, 1.142521916013429, 0.9559914650578799, 0.772725102161203, 1.1168286253669644, 1.0749701655009019, 0.92