# Plots figure 5 supplementary figures

## Import packages

In [1]:
import os, re, glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.optimize
import scipy.stats
import skimage
import plotnine as pn

import colicycle.tools_GW as tgw
import colicycle.time_mat_operations as tmo
import colicycle.decomposition as dc

import tabulate
from IPython.display import HTML, display

## Function definition

In [1]:
#binning function
def windowing(dataframe, variable, step):
    minval = dataframe[variable].min()
    maxval = dataframe[variable].max()
    bin_np, bin_pos = np.histogram(dataframe[variable].dropna(),bins = np.arange(minval,maxval,step))
    newbins = [0.5*(bin_pos[x]+bin_pos[x+1]) for x in range(len(bin_pos)-1)]
    minpos = bin_pos[np.where(bin_np>10)[0][0]]
    maxpos = bin_pos[np.where(bin_np>10)[0][-1]]
    subdata = dataframe[(dataframe[variable]>minpos)&(dataframe[variable]<maxpos)]

    which_bin = np.digitize(subdata[variable],bins = np.arange(0,10,0.05))
    grouped = [subdata.iloc[which_bin==x] for x in np.sort(np.unique(which_bin))]
    return grouped

#renaming function for plotting purposes
def renaming(name):
    
    name = [x.replace('tau_fit','$\\\\lambda$') if type(x)==str else x for x in name]
    name = [x.replace('Li_fit','$L_i\ [\mu m]$') if type(x)==str else x for x in name]
    name = [x.replace('Ld_fit','$L_d\ [\mu m]$') if type(x)==str else x for x in name]
    name = [x.replace('Lb_fit','$L_b\ [\mu m]$') if type(x)==str else x for x in name]
    name = [re.sub(r'\bTi\b', '$T_{bi}$', x) if type(x)==str else x for x in name]
    name = [x.replace('Td','$T_{bd}$') if type(x)==str else x for x in name]
    #name = [re.sub(r'\bfinal_DLdLi\b', '$d\\\\Lambda_{ib}\ [\\\\mu m]$', x) if type(x)==str else x for x in name]
    name = [re.sub(r'\bDLdLi\b', '$d\\\\Lambda_{ib}\ [\\\\mu m]$', x) if type(x)==str else x for x in name]
    name = [re.sub(r'\DLi\b', '$d\\\\Lambda_{if}\ [\\\\mu m]$', x) if type(x)==str else x for x in name]
    
    #name = [re.sub(r'\bDeltaLid\b', '$dL_{id}\ [\\\\mu m]$', x) if type(x)==str else x for x in name]
    name = [re.sub(r'\bnumori_born\b', 'Num. origins at birth', x) if type(x)==str else x for x in name]
    name = [x.replace('DeltaTid','$T_{id}$') if type(x)==str else x for x in name]
    name = [x.replace('DeltaL','$dL\ [\mu m]$') if type(x)==str else x for x in name]
    name = [x.replace('rLdLb','$R_{db}$') if type(x)==str else x for x in name]
    return name

In [3]:
#define folder containing the PreProcess folder
main_folder = '../'

#create a dictionary entry per condition to analyse.
#Give data location, period to consider, condition name for legends, simulation type, final figure name 
#and a reference name for the condition (used when producing simulation data)
datasource = {'glycerolauto': {'datafile': main_folder+'PreProcessed/20170327_GW339_temp/colidata.pkl',
                                'period': 1, 'condition': 'Glycerol (auto)','sim_type':'standard',
                              'fig_names':'fig5_sup1.pdf','condid':'glycerolauto'},
              'glycerol': {'datafile':main_folder+'PreProcessed/20180706_GW296_glycerol37_1_MMStack/colidata.pkl',
                          'period': 0, 'condition' : 'Glycerol','sim_type':'standard',
                          'fig_names':'fig5_sup2.pdf','condid':'glycerol'},
              'glucose': {'datafile': main_folder+'PreProcessed/20180711_GW296_glucose37_1_MMStack/colidata.pkl',
                          'period': 0, 'condition' : 'Glucose','sim_type':'standard',
                         'fig_names':'fig5_sup3.pdf','condid':'glucose'},
              'glucose8aa': {'datafile': main_folder+'PreProcessed/20180709_GW296_glucose8aa37_1_MMStack/colidata.pkl',
                          'period': 0, 'condition' : 'Glucose 8aa','sim_type':'standard',
                            'fig_names':'fig5_sup4.pdf','condid':'glucose8aa'},
             'glycerolauto2': {'datafile': main_folder+'PreProcessed/20170327_GW339_temp/colidata.pkl',
                                'period': 1, 'condition': 'Glycerol (auto)','sim_type':'classicadder',
                             'fig_names':'fig_app2_2.pdf','condid':'glycerolauto'}}

#define folder containing simulation data
simulation_source = main_folder+'DataSimulations/'

#define folder where to save plots
tosave_folder = main_folder+'Plots/'


## Plotting

This large cells plots each individual plot of the Supplementary figure to figure 5 and in the end assembles them into a single large figure.

do_scatter, do_histo etc. allow one to choose which plots to (re-)create. Then the loop goes through the dictionary defined above and creates a figure per entry (plus the individual figures).

In [None]:
leg_xpos = 0.68
fontsize = 25

do_scatter = True
do_histo = True
do_ori = True
do_assembly = True

#go through all conditions/data types
for d in datasource:
    cond = datasource[d]['condid']
    simtype = datasource[d]['sim_type']
    figname = datasource[d]['fig_names']
    
    #######load and format data######
    datafile = datasource[d]['datafile']
    #load experimental data
    expinfo = {}
    expinfo['size_scale'] = 65#nm/px
    expinfo['time_scale'] = 3#in min

    colidata = pd.read_pickle(datafile)
    colidata = colidata[colidata.period==datasource[d]['period']]

    #scale lengths in microns
    colidata[['DLi','Lb_fit','Ld_fit','Ld','Lb','Ld','Li','Li_fit','Li_old']] \
    =colidata[['DLi','Lb_fit','Ld_fit','Ld','Lb','Ld','Li','Li_fit','Li_old']].applymap(lambda x: x*expinfo['size_scale']/1000)

    colidata['DeltaL'] = colidata.Ld_fit-colidata.Lb_fit

    #keep only good data
    colidata = colidata[colidata.pearson_log>0.95]
    colidata = colidata[colidata.tau_fit>0]

    colidata['DLdLi'] = colidata.Ld_fit-colidata.Li_fit
    colidata['final_DLdLi']  = colidata['DLdLi']


    #load simulation data and complete dataframe
    simul_file = simulation_source+cond+'_'+simtype+'.pkl'

    colisimul = pd.read_pickle(simul_file)


    colisimul['DeltaL'] = colisimul.Ld_fit-colisimul.Lb_fit
    colidata['numori_born'] = colidata.Ti.apply(lambda x: 1 if x>=0 else 2)
    colisimul['DLdLi'] = colisimul.Ld_fit-colisimul.Li_fit


    #create average binned data
    group_Lb_simul = windowing(colisimul,'Lb_fit',0.05)
    group_Lb_exp = windowing(colidata,'Lb_fit',0.05)

    group_Li_simul = windowing(colisimul,'Li_fit',0.05)
    group_Li_exp = windowing(colidata,'Li_fit',0.05)

    #list of used variables
    varlist = ['Lb_fit','Ld_fit','DLi','DLdLi','DeltaL']

    #list of pairs of variables used in correlation plots
    varpairs = [['Lb_fit','Li_fit',group_Lb_exp, group_Lb_simul],['Li_fit','DLdLi', group_Li_exp, group_Li_simul],
                ['Lb_fit','DeltaL', group_Lb_exp, group_Lb_simul]]

    #Assigning legend name for data types
    colisimul['datatype'] = 'Simulations'
    colidata['datatype'] = 'Experiments'

    #assembling experimental and simulation data
    coli = pd.concat([colisimul,colidata],sort=False)

    plot_count = 0

    ########plot binned scatter plots###########
    if do_scatter:
        for v in varpairs:
            df = pd.DataFrame({'len':[len(x) for x in v[2]]/np.sum([len(x) for x in v[2]]),
                               v[0]:[x[v[0]].mean() for x in v[2]],
                               v[1]:[x[v[1]].mean() for x in v[2]],
                              v[1]+'std1':[x[v[1]].mean()+x[v[1]].sem() for x in v[2]],
                              v[1]+'std2':[x[v[1]].mean()-x[v[1]].sem() for x in v[2]],
                              'datatype':'Experiments'})
            df_s = pd.DataFrame({'len':[len(x) for x in v[3]]/np.sum([len(x) for x in v[3]]),
                               v[0]:[x[v[0]].mean() for x in v[3]],
                               v[1]:[x[v[1]].mean() for x in v[3]],
                                 v[1]+'std1':[x[v[1]].mean()+x[v[1]].sem() for x in v[3]],
                              v[1]+'std2':[x[v[1]].mean()-x[v[1]].sem() for x in v[3]],
                                'datatype':'Simulations'})
            df = pd.concat([df,df_s])
            

            p = (pn.ggplot()
             + pn.geom_point(pn.aes(x=v[0], y=v[1],alpha ='len', color = 'datatype'),data= df, size = 3)
             + pn.geom_errorbar(pn.aes(x=v[0], ymin = v[1]+'std1', ymax = v[1]+'std2',alpha ='len', color = 'datatype'), data= df, width = 0.02)
             + pn.xlab(renaming([v[0]])[0])
             + pn.ylab(renaming([v[1]])[0])
            +pn.coord_cartesian(ylim = (np.min(df[v[1]])-0.5,np.max(df[v[1]])+1))
             #+ pn.ggtitle('Greek Letter Analysis')
             +pn.theme_bw(base_size = 20)
             + pn.theme(legend_position=(leg_xpos, 0.8),legend_key = pn.element_rect(colour = [1,1,1,1], fill = "white"),
                        legend_background = pn.element_rect(fill = [1,0,0,0]),
                       text=pn.element_text(size=25))
             +pn.scale_colour_manual(values=['red','blue'],name=' ')
             +pn.guides(alpha=False)
             #+ pn.scale_color_discrete(name='name')
            )
            print(p)
            pn.ggplot.save(p,tosave_folder+'fig5sup/'+cond+'_'+simtype+'_'+str(plot_count)+'.png',dpi=600)
            plot_count += 1
    else:
        plot_count += 3

    ########plot hisrograms###########
    if do_histo:
        for vval in varlist:
            padding = 1.5 if (vval == 'DLi')|(vval == 'DeltaL') else 0.5
            p = (pn.ggplot(coli,pn.aes(x=vval,fill = 'datatype'))
            #+pn.stat_bin(y = 'density','position' = 'stack')
             + pn.geom_histogram(pn.aes(y='stat(density)'),position = 'identity',alpha = 0.5)
             + pn.theme_bw(base_size = 20)
             + pn.theme(legend_position=(leg_xpos, 0.8),legend_key = pn.element_rect(colour = [1,1,1,1], fill = "white"),
                        legend_background = pn.element_rect(fill = [1,0,0,0]),
                       text=pn.element_text(size=25))
             + pn.coord_cartesian(xlim = (np.min(coli[vval])-0.5,np.max(coli[vval])+padding))
             + pn.scale_fill_manual(values=['red','blue'],name=' ')
             + pn.guides(alpha=False)
             + pn.xlab(renaming([vval])[0])
             + pn.ylab(' ')
               )
            print(p)
            pn.ggplot.save(p,tosave_folder+'fig5sup/'+cond+'_'+simtype+'_'+str(plot_count)+'.png',dpi=600)
            plot_count += 1
    else:
        plot_count += 5

    ###########plot origins distributions############
    if do_ori:
        or_exp = coli[coli.datatype == 'Experiments'].groupby('numori_born').size()
        or_simul = coli[coli.datatype == 'Simulations'].groupby('numori_born').size()

        or_exp = or_exp/np.sum(or_exp)
        or_simul = or_simul/np.sum(or_simul)

        origins = pd.DataFrame({'Simulations':or_simul,'Experiments':or_exp})
        origins = origins.reset_index()
        origins = pd.melt(origins, value_vars=['Simulations','Experiments'],id_vars='numori_born')


        p = (pn.ggplot(origins, pn.aes(x = 'numori_born',y='value', fill='variable'))
             + pn.geom_col(position="dodge", width = 0.3)
             + pn.theme_bw(base_size = 20)
             + pn.theme(legend_position=(leg_xpos, 0.8),legend_key = pn.element_rect(colour = [1,1,1,1], fill = "white"),
                        legend_background = pn.element_rect(fill = [1,0,0,0]),
                       text=pn.element_text(size=25))
             + pn.scale_fill_manual(values=['red','blue'],name=' ')
             + pn.coord_cartesian(xlim = (0.5,4.5))
             + pn.guides(alpha=False)
             + pn.xlab('Num. origins at birth')
             + pn.ylab(' '))
        print(p)
        pn.ggplot.save(p,tosave_folder+'fig5sup/'+cond+'_'+simtype+'_'+str(plot_count)+'.png',dpi=600)


    ############assemble plots#################
    if do_assembly:
        X,Y = np.meshgrid(np.arange(3),np.arange(3))
        X = np.ravel(X)
        Y = np.ravel(Y)

        fig, ax  = plt.subplots(3,3,figsize=(20,15))
        for x in range(9):
            myim = skimage.io.imread(tosave_folder+'fig5sup/'+cond+'_'+simtype+'_'+str(x)+'.png')
            ax[Y[x],X[x]].imshow(myim)
            ax[Y[x],X[x]].set_axis_off()
        plt.tight_layout(h_pad=-0,w_pad=-10)
        plt.show()
        fig.savefig(tosave_folder+figname,dpi=300)

