# Parsing data from Si et al. Curr. Biol. 2019

This notebooks is used to turn the data into the same format as the ones used in this article, so that we can apply the same methodology to them.

In [5]:
import os, re, requests
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import scipy.optimize
import scipy.stats

import colicycle.tools_GW as tgw
import colicycle.time_mat_operations as tmo

import tabulate
from IPython.display import HTML, display

In [10]:
#The data file can be found in the supplementary materials of Si et al. Curr. Biol. 2019
#We cannot host the file in this repository (as it is not even freely available on the journal
#website). However if you execute this notebook interactively, you can download the file here.
myfile = requests.get('https://ars.els-cdn.com/content/image/1-s2.0-S0960982219304919-mmc3.xlsx', allow_redirects=True)
open('si_et_al.xls', 'wb').write(myfile.content)

filename = 'si_et_al.xls'
acetate = pd.read_excel(filename,sheet_name='MG1655 M9 acetate')


In [11]:
def fit_exp(ydata, T):
    xdata = range(len(ydata))
    tau0 = T
    popt, pcov = scipy.optimize.curve_fit(tgw.fun_expgrowht2, xdata, ydata, p0=[15,tau0])
    tau_g = popt[1]
    Lig_fit = popt[0]
    Lig2_fit = tgw.fun_expgrowht2(T,Lig_fit,tau_g)
    pearson_lin = scipy.stats.pearsonr(xdata, ydata)[0]
    pearson_log = scipy.stats.pearsonr(xdata, np.log(ydata))[0]
    
    return Lig_fit, Lig2_fit, tau_g, pearson_lin, pearson_log

def Ti_mother(row, standard):
    
    if row.Ti < 0:
        if row.mother_id>0:
            new_Ti = standard.at[row.mother_id,'tau_fit'] * np.log(2*row.Li/standard.at[row.mother_id,'Lb'])
            new_Ti = -(standard.at[row.mother_id,'Td']-new_Ti)
        else:
            new_Ti = np.nan
    else:
        new_Ti = row.Ti
    return new_Ti

def DLi(row, standard):
    if row.mother_id>0:
        DLi = row.Li-0.5*standard.at[row.mother_id,'Li']
    else:
        DLi = np.nan
    return DLi

def Liold(row, standard):
    if row.mother_id>0:
        Liold = 0.5*standard.at[row.mother_id,'Li']
    else:
        Liold = np.nan
    return Liold


def cross_generation_construct(colidata, datatype, time_scale):
    colidata['Tg'] = np.nan
    colidata['Lig_fit'] = np.nan
    colidata['Lig2_fit'] = np.nan
    colidata['tau_g'] = np.nan
    colidata['pearsonlog_g'] = np.nan
    colidata['pearsonlin_g'] = np.nan
    #colidata.crossgen_len.astype('object')

    cross_gen_list = [[] for x in colidata.index]

    for ind,x in enumerate(colidata.index):
        if colidata.at[x,'mother_id']>0:
            #crossgen_len = []
            daughter_Ti = colidata.at[x,'Ti']/time_scale
            mother_Ti = colidata.at[colidata.at[x,'mother_id'],'Ti']/time_scale

            if ~np.isnan(daughter_Ti):
                if daughter_Ti>0: 
                    daughter_len = colidata.at[x,'length']  

                    sisters = colidata[colidata.mother_id == colidata.loc[x].mother_id]

                    if (mother_Ti>0):

                        if datatype == 'exp':
                            if len(sisters)!=2:
                                continue

                        if datatype == 'exp':
                            if sisters.iloc[0].name == x:
                                rfact = sisters.iloc[0].Lb_fit/(sisters.iloc[0].Lb_fit+sisters.iloc[1].Lb_fit)
                            else:
                                rfact = sisters.iloc[1].Lb_fit/(sisters.iloc[0].Lb_fit+sisters.iloc[1].Lb_fit)
                        else:
                            rfact = colidata.at[x,'rfact']   

                        mother_len = colidata.at[colidata.at[x,'mother_id'],'length']


                        crossgen_len = np.concatenate((rfact*mother_len[int(mother_Ti)::],daughter_len[0:int(daughter_Ti)]))
                        colidata.loc[x,'Tg'] = len(crossgen_len)
                        colidata.loc[x,'Tbg'] = len(mother_len[int(mother_Ti)::])
                        cross_gen_list[ind] = crossgen_len

                        ydata = crossgen_len
                        T = colidata.at[x, 'Tg']
                        Lig_fit, Lig2_fit, tau_g, pearson_lin, pearson_log = fit_exp(ydata,T)
                        colidata.at[x,'Lig_fit']= Lig_fit
                        colidata.at[x,'Lig2_fit']= Lig2_fit
                        colidata.at[x,'tau_g']= tau_g
                        colidata.at[x,'pearsonlin_g']= pearson_lin
                        colidata.at[x,'pearsonlog_g']= pearson_log
                else:
                    if mother_Ti>0:
                        mother_len = colidata.at[colidata.at[x,'mother_id'],'length']
                        crossgen_len = 0.5*mother_len[int(mother_Ti):len(mother_len)+int(daughter_Ti)]
                        colidata.loc[x,'Tg'] = len(crossgen_len)
                        colidata.loc[x,'Tbg'] = len(mother_len[int(mother_Ti)::])
                        cross_gen_list[ind] = crossgen_len

                        if len(crossgen_len)<3:
                            continue
                        ydata = crossgen_len
                        T = colidata.at[x, 'Tg']
                        Lig_fit, Lig2_fit, tau_g, pearson_lin, pearson_log = fit_exp(ydata,T)
                        colidata.at[x,'Lig_fit']= Lig_fit
                        colidata.at[x,'Lig2_fit']= Lig2_fit
                        colidata.at[x,'tau_g']= tau_g
                        colidata.at[x,'pearsonlin_g']= pearson_lin
                        colidata.at[x,'pearsonlog_g']= pearson_log

    colidata['cross_gen_len'] = pd.Series(cross_gen_list,index=colidata.index)
    
    if datatype == 'simul':
        colidata = colidata[colidata.born>1000]
    else:
        colidata = colidata[colidata.period==1]
        colidata=colidata[colidata.pearson_log>0.95]
        colidata=colidata[colidata.tau_fit>0]
        
    colidata['DeltaL']= colidata['Ld_fit']-colidata['Lb_fit']
    colidata['DeltaLi']= colidata['Li_fit']-colidata['Lb_fit']
    colidata['DeltaLid']= colidata['Ld_fit']-colidata['Li_fit']
    colidata['DeltaLib']= colidata['Li_fit']-colidata['Lb_fit']
    colidata['DeltaTid']= colidata['Td']-colidata['Ti']
    colidata['rLdLb']= colidata['Ld_fit']/colidata['Lb_fit']
    
    colidata['rLig']= colidata['Lig2_fit']/colidata['Lig_fit']
    colidata['DeltaLgi']= colidata['Lig2_fit']-colidata['Lig_fit']
    colidata['DeltaLigb'] = 0.5*colidata.apply(lambda row: tmo.mother_var(row, colidata,'DeltaLid'),axis = 1)

    return colidata


In [12]:
standard = pd.DataFrame(60/acetate['elongation rate (1/hour)'].values,columns=['tau_fit'])


mother_index = np.zeros((len(acetate),1))
mother_index[mother_index==0] = np.nan
for i in range(len(acetate)):
    mind = acetate[acetate['daughter ID'] == acetate.iloc[i]['cell ID']]['cell ID'].values
    if len(mind)>0:
        mother_index[i] = mind
    else:
        mother_index[i] = -1
standard['mother_id'] = np.concatenate(mother_index.astype(np.int32))


standard['born'] = 2000
standard['full_cellcycle'] = True
standard['pearson_log'] = 1



standard['Lb'] = acetate['newborn size (micron)']
standard['Lb_fit'] = acetate['newborn size (micron)']
standard['Ld'] = acetate['division size (micron)']
standard['Ld_fit'] = acetate['division size (micron)']
standard['Li'] = acetate['initiation size per ori (micron)']
standard['Li_fit'] = acetate['initiation size per ori (micron)']

standard['Ti'] = standard.tau_fit * np.log(standard.Li/standard.Lb)
standard['Td'] = acetate['generation time (minute)']

standard['cellID'] = acetate['cell ID']

standard.set_index('cellID',inplace=True)

standard['mLd'] = standard.apply(lambda row: standard.at[int(row.mother_id), 'Ld'] if row.mother_id>0 else np.nan , axis = 1)
standard['length'] = standard.apply(lambda row: row.Lb*np.exp(np.arange(0,int(row['Td']),3)/row.tau_fit),axis = 1)

standard['Ti'] = standard.apply(lambda row: Ti_mother(row,standard),axis = 1)

standard['DLi'] = standard.apply(lambda row: DLi(row,standard),axis = 1)
standard['Li_old'] = standard.apply(lambda row: Liold(row,standard),axis = 1)

standard['Ti'] = 3*np.round(standard['Ti']/3)
standard['Td'] = 3*np.round(standard['Td']/3)

standard['rfact'] = standard['Lb']/standard['mLd']

In [13]:
standard = cross_generation_construct(standard, 'simul',3)

In [14]:
standard.to_pickle('colidata.pkl')