In [1]:
# notebook import

from ipynb.fs.full.data_processing import *

In [2]:
# user input parameters

# boolean parameter to dictate whether code is run
run = False

# boolean parameter to dictate use of print statements
debug = False

# boolean parameter to dictate whether results are written to .csv files
write = False

In [3]:
# function declaration

# input: timestep (timestep), reference name of a feature of interest (feature_ref), and dataframe dictionary to reference (well_dict)
# output: dictionary mapping a well's id number to its list of values of a given feature at an individual timestep
def get_timestep_population_dict(timestep, feature_ref, well_dict):
    population_dict = {key:[] for key in well_dict}
    for well in well_dict:
        well_df = well_dict[well]
        timestep_vals = list((well_df.loc[well_df[timestep_ref] == timestep])[feature_ref])
        population_dict[well] = timestep_vals
    return population_dict

# input: list of measurements (population) and a bin configuration (bins)
# output: shannon entropy for a given population and number of bins
def shannon_entropy_fn(population, bins):
    hist, _ = np.histogram(population, bins=bins, density=True)
    hist = hist[hist > 0]
    return entropy(hist)

# input: dictionary of populations (population_dict), size of random joint sampling of the populations (subset_size), and bin configuration (bins)
# output: mutual information at a timestep 
def calculate_mi(population_dict, subset_size, bins):
    try:
        joint_pop = np.concatenate([np.random.choice(population_dict[pop], subset_size) for pop in population_dict])
        joint_shannon_entropy = shannon_entropy_fn(joint_pop, bins)

        shannon_entropy_list = []
        for _, pop in enumerate(population_dict):
            shannon_entropy = shannon_entropy_fn(population_dict[pop], bins)
            shannon_entropy_list.append(shannon_entropy)
        mean_entropy = sum(shannon_entropy_list) / len(shannon_entropy_list)
        
        mi = joint_shannon_entropy - mean_entropy

        return mi
    except:
        print("Error calculating MI.")
        
        return None

# input: reference name of a feature of interest (feature_ref), set of bin edges (bins), dataframe dictionary to reference (well_dict), and an optional path to a .csv file (csv_fpath)
# output: list of mutual information values calcualted from the given feature and wells      
def get_mi_list(feature_ref, bins, well_dict, csv_fpath=None):
    mi_list = []
    for timestep in timestep_list:
        population_dict = get_timestep_population_dict(timestep, feature_ref, well_dict)
        subset_size = int(ncells_avg_dict[timestep]//len(well_dict))
        mi = calculate_mi(population_dict, subset_size, bins)
        mi_list.append(mi)
        if csv_fpath != None:
            append_row_csv(csv_fpath, [mi])
    return mi_list

In [4]:
# `main()` function declaration

# intended use in coordination with `pairwise_mi_plot.ipynb`
# requires: import of `data_processing.ipynb` and user input parameters specified above
# output: None (assigns new global variable `pairwise_dict` and writes to specified output .csv files)

def main():
    
    # dictionary mapping each feature to its optimal bin edge configuration
    global bin_edges_dict
    bin_edges_dict = create_bin_edges_dict()
    
    # create all pairs of wells
    pairs = [(w1, w2) for i, w1 in enumerate(well_id_list) for w2 in well_id_list[i + 1:]]
    
    # dictionary mapping each well to a dictionary of each of its pairwise mutual information values
    global pairwise_dict
    pairwise_dict = {key:{} for key in pairs}
    
    for (w1, w2) in pairs:
        
        # create a dictionary of well datafeames
        well_dict = {}
        well_dict[w1] = experiment_dict[w1]
        well_dict[w2] = experiment_dict[w2]
        
        for (name, ref) in zip(feature_name_list, feature_ref_list):
            
            # get a list of mutual information values for a given feature
            if write:
                csv_fpath = results_fpath + pairwise_mi_ext + f'{name}_{w1}_{w2}_mi_list.csv'
                append_row_csv(csv_fpath, ['mi'])
                mi_list = get_mi_list(ref, bin_edges_dict[name], well_dict, csv_fpath)
                
            else:
                mi_list = get_mi_list(ref, bin_edges_dict[name], well_dict)
            
            pairwise_dict[(w1, w2)][name] = mi_list
    
    if debug:
        print('pairwise_dict:', type(pairwise_dict), '\n', pairwise_dict)
        
    return

In [5]:
# call to `main()` function

if run:
    main()

pairwise_dict: <class 'dict'> 
 {('01', '03'): {'gfp': [0.432776448020312, 0.5392158365032, 0.42862078090117706, 0.6242467571656078, 0.46369743841555766, 0.5309365198709646, 0.6113280835435653, 0.6168070338363405, 0.6837362137004601, 0.5661713875798755, 0.4716587972644799, 0.43764939415346626, 0.38098580800770065, 0.6318052128094793, 0.38312477593422, 0.500129317852672, 0.35288445266677293, 0.3323736391478955, 0.3030027720613506, 0.3364318563498734, 0.3416730346530268, 0.19131341465857465, 0.482502911785597, 0.25034470319210467, 0.34089584212777124, 0.22574833104028302, 0.12479791099355664, 0.1927669625568864, 0.10082141316028714, 0.002014377923796218, -0.13926329874558174, 0.15082683894893956, 0.08882160796697569, 0.14808313849933885, 0.17374234407837497, 0.07887729344817185, 0.13575376431359665, 0.2012314717057615, 0.1614954113249274, 0.3628957559417789, 0.27697806284849724, 0.3472767799672609, 0.3698220817592057, 0.23564121098506075, 0.2820727522020614, 0.24110650871517114, 0.205240