In [None]:
# notebook import

from ipynb.fs.full.data_processing import *

In [None]:
# function declaration

def create_ncells_list_dict(experiment_dict, timestep_index, num_cells_index):
    ncells_list_dict = {}
    for well in experiment_dict:
        well_df = experiment_dict[well]
        t = 0
        for row in well_df.iterrows():
            item_list = list(row[1])
            if item_list[timestep_index] != t:
                if item_list[timestep_index] not in ncells_list_dict:
                    ncells_list_dict[item_list[timestep_index]] = [item_list[num_cells_index]]
                else: 
                    ncells_list_dict[item_list[timestep_index]].append(item_list[num_cells_index])
                t = item_list[timestep_index]
    return ncells_list_dict
            
def create_ncells_dicts(ncells_list_dict):
    ncells_avg_dict = {}
    ncells_std_dict = {} 
    for t in ncells_list_dict:
        ncells_avg_dict[t] = sum(ncells_list_dict[t])/len(ncells_list_dict[t])
        ncells_std_dict[t] = np.std(ncells_list_dict[t])
    return ncells_avg_dict, ncells_std_dict
        
def create_ncells_list(ncells_dict):
    ncells_list = []
    for t in range(1, len(ncells_dict)+1):
        ncells_list.append(ncells_dict[t])
    return ncells_list

def create_all_methods_bins_dict(methods, feature_df, feature_name_list):
    all_methods_bins_dict = dict.fromkeys(feature_name_list, [])
    for feature in feature_name_list:
        all_methods_bins_dict[feature].append(create_methods_bins_dict(methods, feature_df, feature))
    return all_methods_bins_dict

def create_methods_bins_dict(methods, feature_df, feature_name):
    methods_bins_dict = {}
    for method in methods:
        hist, bin_edges = np.histogram(feature_df[feature_name], bins=method, density=False)
        methods_bins_dict[method] = bin_edges
    return methods_bins_dict

def create_bin_edges_list_dict(all_methods_bins_dict, feature_name_list, selected_methods):
    bin_edges_list_dict = dict.fromkeys(feature_name_list, [])
    for feature in all_methods_bins_dict:
        methods_bins_dict = all_methods_bins_dict[feature]
        bin_edges_list_dict[feature].append(create_bin_edges_list(methods_bins_dict, selected_methods))
    return bin_edges_list_dict
        

def create_bin_edges_list(methods_bins_dict, selected_methods):
    bin_edges_list = []
    for method in selected_methods:
        bin_edges_list =  methods_bins_dict[method]
    return bin_edges_list

In [None]:
# global variables 

ncells_list_dict = create_ncells_list_dict(experiment_dict, timestep_index, num_cells_index)
print('ncells_list_dict:', type(ncells_list_dict), ncells_list_dict)

ncells_avg_dict, ncells_std_dict = create_ncells_dicts(ncells_list_dict)
print('ncells_avg_dict:', type(ncells_avg_dict), ncells_avg_dict)
print('ncells_std_dict:', type(ncells_std_dict), ncells_std_dict)

ncells_avg_list = create_ncells_list(ncells_avg_dict)
print('ncells_avg_list:', type(ncells_avg_list), ncells_avg_list)

ncells_std_list = create_ncells_list(ncells_std_dict)
print('ncells_std_list:', type(ncells_std_list), ncells_std_list)

#all_methods_bins_dict = create_all_methods_bins_dict(methods, feature_df, feature_name_list)
print('all_methods_bins_dict:', type(all_methods_bins_dict), all_methods_bins_dict)

#gfp_methods_bins_dict = all_methods_bins_dict[feature_name_list[0]]
gfp_methods_bins_dict = create_methods_bins_dict(methods, feature_df, feature_name_list[0])
print('gfp_methods_bins_dict:', type(gfp_methods_bins_dict), gfp_methods_bins_dict)

#bin_edges_list_dict = create_bin_edges_list_dict(all_methods_bins_dict, feature_name_list, selected_methods)
#print('bin_edges_list_dict:', type(bin_edges_list_dict), bin_edges_list_dict)

#bin_edges_list = bin_edges_list_dict[feature_name_list[0]]
bin_edges_list = create_bin_edges_list(methods_bins_dict, selected_methods)
print('bin_edges_list:', type(bin_edges_list), bin_edges_list)

In [None]:
# output: dictionary mapping a well's id number to its list of values of a given feature at an individual timestep
def get_timestep_population_dict(experiment_dict, timestep, feature_ref, timestep_ref):
    population_dict = {}
    for well in experiment_dict:
        timestep_vals = []
        for i in range(len(experiment_dict[well][feature_ref])):
            if experiment_dict[well][timestep_ref][i] == timestep:
                val = experiment_dict[well][feature_ref][i]
                timestep_vals.append(val)
        population_dict[well] = timestep_vals
    return population_dict

# calculate shannon entropy given a population and number of bins
def shannon_entropy_fn(population, num_bins):
    hist, _ = np.histogram(population, bins=num_bins, density=True)
    hist = hist[hist > 0]
    return -np.sum(hist * np.log2(hist))


# calculate mutual information at a timestep given a random joint sampling population, dictionary of populations, and number of bins
def calculate_mi(population_dict, subset_size, num_bins):
    #try:
        joint_pop = np.concatenate([np.random.choice(population_dict[pop], subset_size) for pop in population_dict])
        joint_shannon_entropy = shannon_entropy_fn(joint_pop, num_bins)

        shannon_entropy_list = []
        for i, pop in enumerate(population_dict):
            shannon_entropy = shannon_entropy_fn(population_dict[pop], num_bins)
            shannon_entropy_list.append(shannon_entropy)
        mean_entropy = sum(shannon_entropy_list) / len(shannon_entropy_list)

        mi = joint_shannon_entropy - mean_entropy

        return mi
    #except:
    #    return None

def get_mi_list(timestep_list, experiment_dict, feature_ref, ncells_avg_dict, timestep_ref, well_id_list, bin_edges_list):
    mi_dict = dict.fromkeys(bin_edges_list, [])
    for timestep in timestep_list:
        population_dict = get_timestep_population_dict(experiment_dict, timestep, feature_ref, timestep_ref)
        subset_size = int(ncells_avg_dict[timestep]//len(well_id_list))
        for bin_edges in bin_edges_list:
            mi = calculate_mi(population_dict, subset_size, bin_edges)
            mi_dict[bin_edges].append(mi)
            print('mi', timestep, mi)
    return mi_list

def get_mi_list_dict(feature_ref_list, feature_name_list, timestep_list, experiment_dict, ncells_avg_dict, timestep_ref, well_id_list, num_bins):
    mi_list_dict = {}
    for i in range(len(feature_ref_list)):
        mi_list_dict[feature_name_list[i]] = get_mi_list(timestep_list, experiment_dict, feature_ref_list[i], ncells_avg_dict, timestep_ref, well_id_list, num_bins)
    return mi_list_dict

In [None]:
mi_list_dict = get_mi_list(timestep_list, experiment_dict, 'mean_F_C2', ncells_avg_dict, timestep_ref, well_id_list, bin_edges_list)
        
print(mi_list_dict['sqrt'])
print(mi_list_dict['scott'])
print(mi_list_dict['sturges'])