In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import matplotlib.ticker as ticker
import matplotlib.pylab as pl
from itertools import cycle
import matplotlib.gridspec as gridspec
import glob
import collections
import math
import re
import os
from collections import defaultdict

This notebook is used to perform the shannon index etc calculations for the multispecies project.

It's based on the code used for the shannon index etc calculations for the biofilm threshold theory species composition calculations

In [16]:
nRuns = 100
runID_offsets = [0, 100]
duration = 4368 #duration of sims in hours - equivalent to 26 weeks
dates = ["24-Sep-2020", "09-Oct-2020", "14-Oct-2020", "06-Nov-2020", "16-Nov-2020", "25-Nov-2020"]#dates the simulations were performed on
pc_res = [14, 15, 16] #percentages of the populations which are resistant to the applied biocide
phase2_str = "phase2"

#parameters for the log normal distributions used
#[scale, sigma]
log_norm_params_14pcRes = [2.703747953786337, 0.5690825284230452]
log_norm_params_15pcRes = [2.6133256846855746, 0.6260058161550592]
log_norm_params_16pcRes = [2.47772924764521, 0.7060073500033884]

In [17]:
def getFilepathToGenoRuns(date, pc_res, phase):
    '''
    creates a string with the file location of the genotype distributions (all the run_ID files)
    '''
    
    return "geno_distb_data_"+phase+"/"+str(pc_res)+"_resistant-"+date+"/"


def getEventCountersDataframe(date, pc_res, phase, sigma, duration):
    
    return pd.read_csv("geno_distb_data_"+phase+"/"+str(pc_res)+"_resistant-"+date+"/"+str(pc_res)+"_resistant-"+date+"-event_counters-sigma="+"{:.5f}".format(sigma)+"-t="+str(duration)+".0.csv")


def getListOfMeasurementTimes(directory_name):
    '''
    for each runID directory, this gets the filenames and extracts a list of the times they were sampled at.
    directory_name is of form path_to_files/runID_<n>
    
    returns: sorted list of the time vals, in string form with 2 decimal places
    '''
    time_list = []
    def get_numbers_from_filename(filename):
        return re.search(r'(\d+(?:\.\d+)?)', filename).group(0)
    
    for filename in os.listdir(directory_name):
        time_list.append(float(get_numbers_from_filename(filename)))

    return ["{:.2f}".format(float(t)) for t in sorted(time_list)]

In [18]:
def shannonIndexAndEquitabilitySolo(geno_dict):
    '''
    For a single run, this calculates the shannon variables H, E, S.
    Outputs a .csv file with the calculated variables over time.
    Can then combine these into a dictionary of dataframes later.
    '''
    
    times = []
    nBac_t = defaultdict(list) #no. of bacteria over time
    H_t = defaultdict(list) #shannon index over time
    E_t = defaultdict(list) #shannon equitability over time
    S_t = defaultdict(list) #no. of species over time
    
    times = geno_dict.keys()
    #print(times)
    for time_key in times:

        #here we create an array with all the genotypes in it and remove any nans
        geno_vals = geno_dict[time_key].values.flatten()[~np.isnan(geno_dict[time_key].values.flatten())]
        nTot = geno_vals.size #total number of bacteria in the population
        genoCounts = collections.Counter(geno_vals) #number of members of each bacterial species in the system

        H = sum([-n/nTot*math.log(n/nTot) for _, n in genoCounts.items()]) #shannon index of this run at time t
        S = len(genoCounts.keys()) #no. of different species in the system
        logS_adjusted = 1 if S == 1 else math.log(S)
        E = H/logS_adjusted #shannon equitability

        nBac_t[time_key].append(int(nTot))
        H_t[time_key].append(H)
        E_t[time_key].append(E)
        S_t[time_key].append(S)
        
    #this is a very poor way of doing things, but in a rush and just trying to 
    #get a good enough job done atm
    nBac_t_list = [b[0] for b in nBac_t.values()]
    H_t_list = [h[0] for h in H_t.values()]
    E_t_list = [e[0] for e in E_t.values()]
    S_t_list = [s[0] for s in S_t.values()]
    
    return list(H_t.keys()), nBac_t_list, H_t_list, E_t_list, S_t_list

In [19]:
def shannonIndexAndEquitabilitySolo_EDGE(geno_dict):
    '''
    This does the same as the other shannon processing stuff, but just for a single run.
    We'll save all the individual calculations to .csv files, then combine them into a dataframe later
    
    This is just for the edge microhabitats
    '''
    
    times = []
    nBac_t = defaultdict(list) #no. of bacteria over time
    H_t = defaultdict(list) #shannon index over time
    E_t = defaultdict(list) #shannon equitability over time
    S_t = defaultdict(list) #no. of species over time
    
    times = geno_dict.keys()
    #print(times)
    for time_key in times:
        
        #get the last key in this timestep, hopefully it's the edge one
        edge_mh_key = geno_dict[time_key].keys()[-1]
        #here we create an array with all the genotypes in it and remove any nans (this version should just be of the edge values)
        geno_vals = geno_dict[time_key][edge_mh_key].values.flatten()[~np.isnan(geno_dict[time_key][edge_mh_key].values.flatten())]
        nTot = geno_vals.size #total number of bacteria in the population
        genoCounts = collections.Counter(geno_vals) #number of members of each bacterial species in the system

        H = sum([-n/nTot*math.log(n/nTot) for _, n in genoCounts.items()]) #shannon index of this run at time t
        S = len(genoCounts.keys()) #no. of different species in the system
        logS_adjusted = 1 if S <= 1 else math.log(S)
        E = H/logS_adjusted #shannon equitability

        nBac_t[time_key].append(int(nTot))
        H_t[time_key].append(H)
        E_t[time_key].append(E)
        S_t[time_key].append(S)
        
    #this is a very poor way of doing things, but in a rush and just trying to 
    #get a good enough job done atm
    nBac_t_list = [b[0] for b in nBac_t.values()]
    H_t_list = [h[0] for h in H_t.values()]
    E_t_list = [e[0] for e in E_t.values()]
    S_t_list = [s[0] for s in S_t.values()]
    
    return list(H_t.keys()), nBac_t_list, H_t_list, E_t_list, S_t_list

In [20]:
def writeShannonCalculationsToFile(t_data, nBac_data, H_data, E_data, S_data, pc_res, date, phase_val, growth_val, filename):
    '''
    it takes an absolute age to load in all the genotype data, so this method will write the calculated values to a file
    '''
    #create a dataframe containing all the calculated values
    #first we need a dictionary with the data in it
    collated_data = {'t':t_data, 'nBac':nBac_data, 'H':H_data, 'E':E_data, 'S':S_data}
    df = pd.DataFrame(collated_data)
    df.to_csv("shannon_calculations_"+phase_val+"_"+growth_val+"/"+str(pc_res)+"_pc_res-"+date+"/"+filename)

In [21]:
def writeShannonCalculationsToFile_EDGE(t_data, nBac_data, H_data, E_data, S_data, pc_res, date, phase_val, growth_val, filename):
    '''
    it takes an absolute age to load in all the genotype data, so this method will write the calculated values to a file
    this method is just for the edge data
    '''
    #create a dataframe containing all the calculated values
    #first we need a dictionary with the data in it
    collated_data = {'t':t_data, 'nBac':nBac_data, 'H':H_data, 'E':E_data, 'S':S_data}
    df = pd.DataFrame(collated_data)
    df.to_csv("shannon_calculations_"+phase_val+"_"+growth_val+"_EDGE/"+str(pc_res)+"_pc_res-"+date+"_"+str(growth_val)+"_EDGE/"+filename)

In [22]:
def readGenoDistbAndProcessShannonData(directoryPath, pc_res, date, phase_val, growth_val, runID):
    '''
    This loads in all the genotype data for a single run.
    
    growth_val can either be "GROWTH" for runs which exhibit growth, "NOGROWTH" for runs which remain in the first microhabitat,
    or "" if you want to 
    '''
    runID_key = "runID_"+str(runID)
    print(runID_key)
    filepath_runID = directoryPath+"/"+runID_key
    geno_time_dict = {} #dictionary containing geno dataframes for each timestep

    time_list = getListOfMeasurementTimes(filepath_runID) #sorted list of the times that the genos were sampled at in this run
    
    for t in time_list:
            
        filepath_time = filepath_runID+"/geno_distb-t="+t+".csv"

        #need to swap the rows and columns so that the microhabitat is the key in the dataframe
        #geno_df = pd.read_csv(filename, header=None).T
        geno_df = pd.DataFrame([line.strip().split(',') for line in open(filepath_time, 'r')]).T
        #geno
        new_header = geno_df.iloc[0] #grab the first row for the header
        geno_df = geno_df[1:] #take the data less the header row
        geno_df.columns = new_header #set the header row as the df header

        geno_df = geno_df.astype(float)

        #round the time to the nearest integer value to make reading it in easier
        #the [-3] is so the decimal point and decimal numbers are removed when casting the string to an int
        geno_time_dict[int(t[:-3])] = geno_df

    #we now have the geno distb loaded, so can process it
    t_list, nBac_list, H_vs_t_list, E_vs_t_list, S_vs_t_list = shannonIndexAndEquitabilitySolo(geno_time_dict)
    #write the data to file 
    writeShannonCalculationsToFile(t_list, nBac_list, H_vs_t_list, E_vs_t_list, S_vs_t_list, pc_res=pc_res, date=date, phase_val=phase_val, growth_val=growth_val,
                                   filename="shannon_calculations-"+str(pc_res)+"_pc_res-runID_"+str(runID)+".csv")
    del(geno_time_dict)

In [23]:
def readGenoDistbAndProcessShannonData_EDGE(directoryPath, pc_res, date, phase_val, growth_val, runID):
    '''
    Loading all the dataframes into one master dictionary was causing serious memory issues.
    So here we'll just load in the geno data and process it for a single run at a time.
    this method is just for the edge microhabitats
    '''
    runID_key = "runID_"+str(runID)
    print(runID_key)
    filepath_runID = directoryPath+"/"+runID_key
    geno_time_dict = {} #dictionary containing geno dataframes for each timestep

    time_list = getListOfMeasurementTimes(filepath_runID) #sorted list of the times that the genos were sampled at in this run
    
    for t in time_list:
            
        filepath_time = filepath_runID+"/geno_distb-t="+t+".csv"

        #need to swap the rows and columns so that the microhabitat is the key in the dataframe
        #geno_df = pd.read_csv(filename, header=None).T
        geno_df = pd.DataFrame([line.strip().split(',') for line in open(filepath_time, 'r')]).T
        #geno
        new_header = geno_df.iloc[0] #grab the first row for the header
        geno_df = geno_df[1:] #take the data less the header row
        geno_df.columns = new_header #set the header row as the df header

        geno_df = geno_df.astype(float)

        #round the time to the nearest integer value to make reading it in easier
        #the [-3] is so the decimal point and decimal numbers are removed when casting the string to an int
        geno_time_dict[int(t[:-3])] = geno_df

    #we now have the geno distb loaded, so can process it
    t_list, nBac_list, H_vs_t_list, E_vs_t_list, S_vs_t_list = shannonIndexAndEquitabilitySolo_EDGE(geno_time_dict)
    #write the data to file 
    writeShannonCalculationsToFile_EDGE(t_list, nBac_list, H_vs_t_list, E_vs_t_list, S_vs_t_list, pc_res=pc_res, date=date, phase_val=phase_val, growth_val=growth_val,
                                   filename="shannon_calculations-"+str(pc_res)+"_pc_res-runID_"+str(runID)+".csv")
    del(geno_time_dict)

In [24]:
def shannonProcessNRuns(shannon_function, runID_list, directoryPath, pc_res_val, date, phase_val_str, growth_val_str):
    '''
    This is just a simple function to replace the for loops that used to be used to process the data
    '''
    print(pc_res_val)
    print(growth_val_str)
    for runID in runID_list:
        
        shannon_function(directoryPath=directoryPath, pc_res=pc_res_val, date=date, phase_val=phase_val_str, growth_val=growth_val_str, runID=runID)
    print()

In [25]:
pc_res_14_24_Sep_filepath = getFilepathToGenoRuns(date=dates[0], pc_res=pc_res[0], phase=phase2_str)
pc_res_15_24_Sep_filepath = getFilepathToGenoRuns(date=dates[0], pc_res=pc_res[1], phase=phase2_str)
pc_res_16_24_Sep_filepath = getFilepathToGenoRuns(date=dates[0], pc_res=pc_res[2], phase=phase2_str)

pc_res_14_09_Oct_filepath = getFilepathToGenoRuns(date=dates[1], pc_res=pc_res[0], phase=phase2_str)
pc_res_15_09_Oct_filepath = getFilepathToGenoRuns(date=dates[1], pc_res=pc_res[1], phase=phase2_str)
pc_res_16_09_Oct_filepath = getFilepathToGenoRuns(date=dates[1], pc_res=pc_res[2], phase=phase2_str)

pc_res_14_14_Oct_filepath = getFilepathToGenoRuns(date=dates[2], pc_res=pc_res[0], phase=phase2_str)
pc_res_15_14_Oct_filepath = getFilepathToGenoRuns(date=dates[2], pc_res=pc_res[1], phase=phase2_str)
pc_res_16_14_Oct_filepath = getFilepathToGenoRuns(date=dates[2], pc_res=pc_res[2], phase=phase2_str)

pc_res_14_06_Nov_filepath = getFilepathToGenoRuns(date=dates[3], pc_res=pc_res[0], phase=phase2_str)
pc_res_15_06_Nov_filepath = getFilepathToGenoRuns(date=dates[3], pc_res=pc_res[1], phase=phase2_str)
pc_res_16_06_Nov_filepath = getFilepathToGenoRuns(date=dates[3], pc_res=pc_res[2], phase=phase2_str)

pc_res_14_16_Nov_filepath = getFilepathToGenoRuns(date=dates[4], pc_res=pc_res[0], phase=phase2_str)
pc_res_15_16_Nov_filepath = getFilepathToGenoRuns(date=dates[4], pc_res=pc_res[1], phase=phase2_str)
pc_res_16_16_Nov_filepath = getFilepathToGenoRuns(date=dates[4], pc_res=pc_res[2], phase=phase2_str)

pc_res_14_25_Nov_filepath = getFilepathToGenoRuns(date=dates[5], pc_res=pc_res[0], phase=phase2_str)
pc_res_15_25_Nov_filepath = getFilepathToGenoRuns(date=dates[5], pc_res=pc_res[1], phase=phase2_str)
pc_res_16_25_Nov_filepath = getFilepathToGenoRuns(date=dates[5], pc_res=pc_res[2], phase=phase2_str)



print(pc_res_14_06_Nov_filepath)

geno_distb_data_phase2/14_resistant-06-Nov-2020/


The discrepancy in species composition for runs which exhibit growth, and runs which don't, I believe is throwing off the shannon calculations somewhat.

Therefore, using the event_counters dataframe, we can seperate the runs into categories of GROWTH (thickess > 0) and NO_GROWTH.

# Due to the no. of runs being different on each session for some of the runs, we'll add a "date" column to the event counters which can then be used to select the correct results directory 

# (This is only really needed in the geno plotter notebook, but we'll do it here too just in case)

In [26]:
event_counters_14pc_24Sep = getEventCountersDataframe(dates[0], pc_res[0], phase2_str, log_norm_params_14pcRes[1], duration)
event_counters_14pc_24Sep["date"] = "14-Sep-2020"
event_counters_15pc_24Sep = getEventCountersDataframe(dates[0], pc_res[1], phase2_str, log_norm_params_15pcRes[1], duration)
event_counters_15pc_24Sep["date"] = "14-Sep-2020"
event_counters_16pc_24Sep = getEventCountersDataframe(dates[0], pc_res[2], phase2_str, log_norm_params_16pcRes[1], duration)
event_counters_16pc_24Sep["date"] = "14-Sep-2020"

event_counters_14pc_09Oct = getEventCountersDataframe(dates[1], pc_res[0], phase2_str, log_norm_params_14pcRes[1], duration)
event_counters_14pc_09Oct["date"] = "09-Oct-2020"
event_counters_15pc_09Oct = getEventCountersDataframe(dates[1], pc_res[1], phase2_str, log_norm_params_15pcRes[1], duration)
event_counters_15pc_09Oct["date"] = "09-Oct-2020"
event_counters_16pc_09Oct = getEventCountersDataframe(dates[1], pc_res[2], phase2_str, log_norm_params_16pcRes[1], duration)
event_counters_16pc_09Oct["date"] = "09-Oct-2020"

event_counters_14pc_14Oct = getEventCountersDataframe(dates[2], pc_res[0], phase2_str, log_norm_params_14pcRes[1], duration)
event_counters_14pc_14Oct["date"] = "14-Oct-2020"
event_counters_15pc_14Oct = getEventCountersDataframe(dates[2], pc_res[1], phase2_str, log_norm_params_15pcRes[1], duration)
event_counters_15pc_14Oct["date"] = "14-Oct-2020"
event_counters_16pc_14Oct = getEventCountersDataframe(dates[2], pc_res[2], phase2_str, log_norm_params_16pcRes[1], duration)
event_counters_16pc_14Oct["date"] = "14-Oct-2020"

event_counters_14pc_06Nov = getEventCountersDataframe(dates[3], pc_res[0], phase2_str, log_norm_params_14pcRes[1], duration)
event_counters_14pc_06Nov["date"] = "06-Nov-2020"
event_counters_15pc_06Nov = getEventCountersDataframe(dates[3], pc_res[1], phase2_str, log_norm_params_15pcRes[1], duration)
event_counters_15pc_06Nov["date"] = "06-Nov-2020"
event_counters_16pc_06Nov = getEventCountersDataframe(dates[3], pc_res[2], phase2_str, log_norm_params_16pcRes[1], duration)
event_counters_16pc_06Nov["date"] = "06-Nov-2020"

event_counters_14pc_16Nov = getEventCountersDataframe(dates[4], pc_res[0], phase2_str, log_norm_params_14pcRes[1], duration)
event_counters_14pc_16Nov["date"] = "16-Nov-2020"
event_counters_15pc_16Nov = getEventCountersDataframe(dates[4], pc_res[1], phase2_str, log_norm_params_15pcRes[1], duration)
event_counters_15pc_16Nov["date"] = "16-Nov-2020"
event_counters_16pc_16Nov = getEventCountersDataframe(dates[4], pc_res[2], phase2_str, log_norm_params_16pcRes[1], duration)
event_counters_16pc_16Nov["date"] = "16-Nov-2020"

# event_counters_14pc_25Nov = getEventCountersDataframe(dates[5], pc_res[0], phase2_str, log_norm_params_14pcRes[1], duration)
# event_counters_14pc_25Nov["date"] = "25-Nov-2020"
event_counters_15pc_25Nov = getEventCountersDataframe(dates[5], pc_res[1], phase2_str, log_norm_params_15pcRes[1], duration)
event_counters_15pc_25Nov["date"] = "25-Nov-2020"
event_counters_16pc_25Nov = getEventCountersDataframe(dates[5], pc_res[2], phase2_str, log_norm_params_16pcRes[1], duration)
event_counters_16pc_25Nov["date"] = "25-Nov-2020"



In [34]:
GROWTH_RUNS_14pc_24Sep = list(event_counters_14pc_24Sep["runID"][(event_counters_14pc_24Sep["bf_thickness"] > 0)])
GROWTH_RUNS_15pc_24Sep = list(event_counters_15pc_24Sep["runID"][(event_counters_15pc_24Sep["bf_thickness"] > 0)])
GROWTH_RUNS_16pc_24Sep = list(event_counters_16pc_24Sep["runID"][(event_counters_16pc_24Sep["bf_thickness"] > 0)])

GROWTH_RUNS_14pc_09Oct = list(event_counters_14pc_09Oct["runID"][(event_counters_14pc_09Oct["bf_thickness"] > 0)])
GROWTH_RUNS_15pc_09Oct = list(event_counters_15pc_09Oct["runID"][(event_counters_15pc_09Oct["bf_thickness"] > 0)])
GROWTH_RUNS_16pc_09Oct = list(event_counters_16pc_09Oct["runID"][(event_counters_16pc_09Oct["bf_thickness"] > 0)])

GROWTH_RUNS_14pc_14Oct = list(event_counters_14pc_14Oct["runID"][(event_counters_14pc_14Oct["bf_thickness"] > 0)])
GROWTH_RUNS_15pc_14Oct = list(event_counters_15pc_14Oct["runID"][(event_counters_15pc_14Oct["bf_thickness"] > 0)])
GROWTH_RUNS_16pc_14Oct = list(event_counters_16pc_14Oct["runID"][(event_counters_16pc_14Oct["bf_thickness"] > 0)])

GROWTH_RUNS_14pc_06Nov = list(event_counters_14pc_06Nov["runID"][(event_counters_14pc_06Nov["bf_thickness"] > 0)])
GROWTH_RUNS_15pc_06Nov = list(event_counters_15pc_06Nov["runID"][(event_counters_15pc_06Nov["bf_thickness"] > 0)])
GROWTH_RUNS_16pc_06Nov = list(event_counters_16pc_06Nov["runID"][(event_counters_16pc_06Nov["bf_thickness"] > 0)])

GROWTH_RUNS_14pc_16Nov = list(event_counters_14pc_16Nov["runID"][(event_counters_14pc_16Nov["bf_thickness"] > 0)])
GROWTH_RUNS_15pc_16Nov = list(event_counters_15pc_16Nov["runID"][(event_counters_15pc_16Nov["bf_thickness"] > 0)])
GROWTH_RUNS_16pc_16Nov = list(event_counters_16pc_16Nov["runID"][(event_counters_16pc_16Nov["bf_thickness"] > 0)])

#GROWTH_RUNS_14pc_25Nov = list(event_counters_14pc_25Nov["runID"][(event_counters_14pc_25Nov["bf_thickness"] > 0)])
GROWTH_RUNS_15pc_25Nov = list(event_counters_15pc_25Nov["runID"][(event_counters_15pc_25Nov["bf_thickness"] > 0)])
GROWTH_RUNS_16pc_25Nov = list(event_counters_16pc_25Nov["runID"][(event_counters_16pc_25Nov["bf_thickness"] > 0)])





NOGROWTH_RUNS_14pc_24Sep = list(event_counters_14pc_24Sep["runID"][~(event_counters_14pc_24Sep["bf_thickness"] > 0)])
NOGROWTH_RUNS_15pc_24Sep = list(event_counters_15pc_24Sep["runID"][~(event_counters_15pc_24Sep["bf_thickness"] > 0)])
NOGROWTH_RUNS_16pc_24Sep = list(event_counters_15pc_24Sep["runID"][~(event_counters_16pc_24Sep["bf_thickness"] > 0)])

NOGROWTH_RUNS_14pc_09Oct = list(event_counters_14pc_09Oct["runID"][~(event_counters_14pc_09Oct["bf_thickness"] > 0)])
NOGROWTH_RUNS_15pc_09Oct = list(event_counters_15pc_09Oct["runID"][~(event_counters_15pc_09Oct["bf_thickness"] > 0)])
NOGROWTH_RUNS_16pc_09Oct = list(event_counters_16pc_09Oct["runID"][~(event_counters_16pc_09Oct["bf_thickness"] > 0)])

NOGROWTH_RUNS_14pc_14Oct = list(event_counters_14pc_14Oct["runID"][~(event_counters_14pc_14Oct["bf_thickness"] > 0)])
NOGROWTH_RUNS_15pc_14Oct = list(event_counters_15pc_14Oct["runID"][~(event_counters_15pc_14Oct["bf_thickness"] > 0)])
NOGROWTH_RUNS_16pc_14Oct = list(event_counters_16pc_14Oct["runID"][~(event_counters_16pc_14Oct["bf_thickness"] > 0)])

NOGROWTH_RUNS_14pc_06Nov = list(event_counters_14pc_06Nov["runID"][~(event_counters_14pc_06Nov["bf_thickness"] > 0)])
NOGROWTH_RUNS_15pc_06Nov = list(event_counters_15pc_06Nov["runID"][~(event_counters_15pc_06Nov["bf_thickness"] > 0)])
NOGROWTH_RUNS_16pc_06Nov = list(event_counters_16pc_06Nov["runID"][~(event_counters_16pc_06Nov["bf_thickness"] > 0)])

NOGROWTH_RUNS_14pc_16Nov = list(event_counters_14pc_16Nov["runID"][~(event_counters_14pc_16Nov["bf_thickness"] > 0)])
NOGROWTH_RUNS_15pc_16Nov = list(event_counters_15pc_16Nov["runID"][~(event_counters_15pc_16Nov["bf_thickness"] > 0)])
NOGROWTH_RUNS_16pc_16Nov = list(event_counters_16pc_16Nov["runID"][~(event_counters_16pc_16Nov["bf_thickness"] > 0)])

#NOGROWTH_RUNS_14pc_25Nov = list(event_counters_14pc_25Nov["runID"][~(event_counters_14pc_25Nov["bf_thickness"] > 0)])
NOGROWTH_RUNS_15pc_25Nov = list(event_counters_15pc_25Nov["runID"][~(event_counters_15pc_25Nov["bf_thickness"] > 0)])
NOGROWTH_RUNS_16pc_25Nov = list(event_counters_16pc_25Nov["runID"][~(event_counters_16pc_25Nov["bf_thickness"] > 0)])



In [28]:
#the GROWTH runs
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_14pc_24Sep, directoryPath=pc_res_14_24_Sep_filepath, pc_res_val=pc_res[0], date=dates[0], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_15pc_24Sep, directoryPath=pc_res_15_24_Sep_filepath, pc_res_val=pc_res[1], date=dates[0], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_16pc_24Sep, directoryPath=pc_res_16_24_Sep_filepath, pc_res_val=pc_res[2], date=dates[0], phase_val_str=phase2_str, growth_val_str="GROWTH")

# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_14pc_09Oct, directoryPath=pc_res_14_09_Oct_filepath, pc_res_val=pc_res[0], date=dates[1], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_15pc_09Oct, directoryPath=pc_res_15_09_Oct_filepath, pc_res_val=pc_res[1], date=dates[1], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_16pc_09Oct, directoryPath=pc_res_16_09_Oct_filepath, pc_res_val=pc_res[2], date=dates[1], phase_val_str=phase2_str, growth_val_str="GROWTH")

# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_14pc_14Oct, directoryPath=pc_res_14_14_Oct_filepath, pc_res_val=pc_res[0], date=dates[2], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_15pc_14Oct, directoryPath=pc_res_15_14_Oct_filepath, pc_res_val=pc_res[1], date=dates[2], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_16pc_14Oct, directoryPath=pc_res_16_14_Oct_filepath, pc_res_val=pc_res[2], date=dates[2], phase_val_str=phase2_str, growth_val_str="GROWTH")

#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_14pc_06Nov, directoryPath=pc_res_14_06_Nov_filepath, pc_res_val=pc_res[0], date=dates[3], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_15pc_06Nov, directoryPath=pc_res_15_06_Nov_filepath, pc_res_val=pc_res[1], date=dates[3], phase_val_str=phase2_str, growth_val_str="GROWTH")
#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_16pc_06Nov, directoryPath=pc_res_16_06_Nov_filepath, pc_res_val=pc_res[2], date=dates[3], phase_val_str=phase2_str, growth_val_str="GROWTH")

shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_14pc_16Nov, directoryPath=pc_res_14_16_Nov_filepath, pc_res_val=pc_res[0], date=dates[4], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_15pc_16Nov, directoryPath=pc_res_15_16_Nov_filepath, pc_res_val=pc_res[1], date=dates[4], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_16pc_16Nov, directoryPath=pc_res_16_16_Nov_filepath, pc_res_val=pc_res[2], date=dates[4], phase_val_str=phase2_str, growth_val_str="GROWTH")

#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_14pc_25Nov, directoryPath=pc_res_14_25_Nov_filepath, pc_res_val=pc_res[0], date=dates[5], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_15pc_25Nov, directoryPath=pc_res_15_25_Nov_filepath, pc_res_val=pc_res[1], date=dates[5], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=GROWTH_RUNS_16pc_25Nov, directoryPath=pc_res_16_25_Nov_filepath, pc_res_val=pc_res[2], date=dates[5], phase_val_str=phase2_str, growth_val_str="GROWTH")




15
GROWTH
runID_251
runID_268
runID_269
runID_281
runID_293
runID_294
runID_299

14
GROWTH
runID_402
runID_413
runID_418
runID_451

15
GROWTH
runID_301
runID_310
runID_311

16
GROWTH
runID_178
runID_185
runID_186
runID_187
runID_191
runID_192

15
GROWTH
runID_325
runID_332
runID_340
runID_348

16
GROWTH
runID_202
runID_206
runID_208
runID_209
runID_218



In [29]:
#the EDGE GROWTH runs
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_14pc_24Sep, directoryPath=pc_res_14_24_Sep_filepath, pc_res_val=pc_res[0], date=dates[0], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_15pc_24Sep, directoryPath=pc_res_15_24_Sep_filepath, pc_res_val=pc_res[1], date=dates[0], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_16pc_24Sep, directoryPath=pc_res_16_24_Sep_filepath, pc_res_val=pc_res[2], date=dates[0], phase_val_str=phase2_str, growth_val_str="GROWTH")

# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_14pc_09Oct, directoryPath=pc_res_14_09_Oct_filepath, pc_res_val=pc_res[0], date=dates[1], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_15pc_09Oct, directoryPath=pc_res_15_09_Oct_filepath, pc_res_val=pc_res[1], date=dates[1], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_16pc_09Oct, directoryPath=pc_res_16_09_Oct_filepath, pc_res_val=pc_res[2], date=dates[1], phase_val_str=phase2_str, growth_val_str="GROWTH")

# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_14pc_14Oct, directoryPath=pc_res_14_14_Oct_filepath, pc_res_val=pc_res[0], date=dates[2], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_15pc_14Oct, directoryPath=pc_res_15_14_Oct_filepath, pc_res_val=pc_res[1], date=dates[2], phase_val_str=phase2_str, growth_val_str="GROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_16pc_14Oct, directoryPath=pc_res_16_14_Oct_filepath, pc_res_val=pc_res[2], date=dates[2], phase_val_str=phase2_str, growth_val_str="GROWTH")

#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_14pc_06Nov, directoryPath=pc_res_14_06_Nov_filepath, pc_res_val=pc_res[0], date=dates[3], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_15pc_06Nov, directoryPath=pc_res_15_06_Nov_filepath, pc_res_val=pc_res[1], date=dates[3], phase_val_str=phase2_str, growth_val_str="GROWTH")
#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_16pc_06Nov, directoryPath=pc_res_16_06_Nov_filepath, pc_res_val=pc_res[2], date=dates[3], phase_val_str=phase2_str, growth_val_str="GROWTH")

shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_14pc_16Nov, directoryPath=pc_res_14_16_Nov_filepath, pc_res_val=pc_res[0], date=dates[4], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_15pc_16Nov, directoryPath=pc_res_15_16_Nov_filepath, pc_res_val=pc_res[1], date=dates[4], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_16pc_16Nov, directoryPath=pc_res_16_16_Nov_filepath, pc_res_val=pc_res[2], date=dates[4], phase_val_str=phase2_str, growth_val_str="GROWTH")

#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_14pc_25Nov, directoryPath=pc_res_14_25_Nov_filepath, pc_res_val=pc_res[0], date=dates[5], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_15pc_25Nov, directoryPath=pc_res_15_25_Nov_filepath, pc_res_val=pc_res[1], date=dates[5], phase_val_str=phase2_str, growth_val_str="GROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData_EDGE, runID_list=GROWTH_RUNS_16pc_25Nov, directoryPath=pc_res_16_25_Nov_filepath, pc_res_val=pc_res[2], date=dates[5], phase_val_str=phase2_str, growth_val_str="GROWTH")


15
GROWTH
runID_251
runID_268
runID_269
runID_281
runID_293
runID_294
runID_299

14
GROWTH
runID_402
runID_413
runID_418
runID_451

15
GROWTH
runID_301
runID_310
runID_311

16
GROWTH
runID_178
runID_185
runID_186
runID_187
runID_191
runID_192

15
GROWTH
runID_325
runID_332
runID_340
runID_348

16
GROWTH
runID_202
runID_206
runID_208
runID_209
runID_218



In [None]:
#the NOGROWTH runs
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_14pc_24Sep, directoryPath=pc_res_14_24_Sep_filepath, pc_res_val=pc_res[0], date=dates[0], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_15pc_24Sep, directoryPath=pc_res_15_24_Sep_filepath, pc_res_val=pc_res[1], date=dates[0], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_16pc_24Sep, directoryPath=pc_res_16_24_Sep_filepath, pc_res_val=pc_res[2], date=dates[0], phase_val_str=phase2_str, growth_val_str="NOGROWTH")

# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_14pc_09Oct, directoryPath=pc_res_14_09_Oct_filepath, pc_res_val=pc_res[0], date=dates[1], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_15pc_09Oct, directoryPath=pc_res_15_09_Oct_filepath, pc_res_val=pc_res[1], date=dates[1], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_16pc_09Oct, directoryPath=pc_res_16_09_Oct_filepath, pc_res_val=pc_res[2], date=dates[1], phase_val_str=phase2_str, growth_val_str="NOGROWTH")

# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_14pc_14Oct, directoryPath=pc_res_14_14_Oct_filepath, pc_res_val=pc_res[0], date=dates[2], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_15pc_14Oct, directoryPath=pc_res_15_14_Oct_filepath, pc_res_val=pc_res[1], date=dates[2], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
# shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_16pc_14Oct, directoryPath=pc_res_16_14_Oct_filepath, pc_res_val=pc_res[2], date=dates[2], phase_val_str=phase2_str, growth_val_str="NOGROWTH")

#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_14pc_06Nov, directoryPath=pc_res_14_06_Nov_filepath, pc_res_val=pc_res[0], date=dates[3], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_15pc_06Nov, directoryPath=pc_res_15_06_Nov_filepath, pc_res_val=pc_res[1], date=dates[3], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_16pc_06Nov, directoryPath=pc_res_16_06_Nov_filepath, pc_res_val=pc_res[2], date=dates[3], phase_val_str=phase2_str, growth_val_str="NOGROWTH")

shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_14pc_16Nov, directoryPath=pc_res_14_16_Nov_filepath, pc_res_val=pc_res[0], date=dates[4], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_15pc_16Nov, directoryPath=pc_res_15_16_Nov_filepath, pc_res_val=pc_res[1], date=dates[4], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_16pc_16Nov, directoryPath=pc_res_16_16_Nov_filepath, pc_res_val=pc_res[2], date=dates[4], phase_val_str=phase2_str, growth_val_str="NOGROWTH")

#shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_14pc_25Nov, directoryPath=pc_res_14_25_Nov_filepath, pc_res_val=pc_res[0], date=dates[5], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_15pc_25Nov, directoryPath=pc_res_15_25_Nov_filepath, pc_res_val=pc_res[1], date=dates[5], phase_val_str=phase2_str, growth_val_str="NOGROWTH")
shannonProcessNRuns(shannon_function=readGenoDistbAndProcessShannonData, runID_list=NOGROWTH_RUNS_16pc_25Nov, directoryPath=pc_res_16_25_Nov_filepath, pc_res_val=pc_res[2], date=dates[5], phase_val_str=phase2_str, growth_val_str="NOGROWTH")


15
NOGROWTH
runID_250
runID_252
runID_253
runID_254
runID_255
runID_256
runID_257
runID_258
runID_259
runID_260
runID_261
runID_262
runID_263
runID_264
runID_265
runID_266
runID_267
runID_270
runID_271
runID_272
runID_273
runID_274
runID_275
runID_276
runID_277
runID_278
runID_279
runID_280
runID_282
runID_283
runID_284
runID_285
runID_286
runID_287
runID_288
runID_289
runID_290
runID_291
runID_292
runID_295
runID_296
runID_297
runID_298

14
NOGROWTH
runID_400
runID_401
runID_403
runID_404
runID_405
runID_406
runID_407
runID_408
runID_409
runID_410
runID_411
runID_412
runID_414
runID_415
runID_416
runID_417
runID_419
runID_420
runID_421
runID_422
runID_423
runID_424
runID_425
runID_426
runID_427
runID_428
runID_429
runID_430
runID_431
runID_432
runID_433
runID_434
runID_435
runID_436
runID_437
runID_438
runID_439
runID_440
runID_441
runID_442
runID_443
runID_444
runID_445
runID_446
runID_447
runID_448
runID_449
runID_450
runID_452
runID_453
runID_454
runID_455
runID_456
runID_457
runID

In [None]:
#all of the runs


In [None]:
# for runID in GROWTH_RUNS_14pc_24Sep:
#     readGenoDistbAndProcessShannonData_EDGE(directoryPath=pc_res_14_24_Sep_filepath, pc_res=pc_res[0], date=dates[0], phase_val=phase2_str, growth_val="GROWTH", runID=runID)

In [None]:
# for runID in GROWTH_RUNS_15pc_24Sep:
#     readGenoDistbAndProcessShannonData_EDGE(directoryPath=pc_res_15_24_Sep_filepath, pc_res=pc_res[1], date=dates[0], phase_val=phase2_str, growth_val="GROWTH", runID=runID)

In [None]:
x = 1, 2 ,3, 6

In [None]:
x

In [None]:
a, b, c, d = x

In [None]:
j, k, l, m = a, b, c, d

In [None]:
res = j, k, l, m

In [None]:
q, w, e, r =res

In [None]:
q