# Importing experimental data from Si & Le Treut (2019)

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

## Imports

In [2]:
import os, copy, pickle
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import sys
sys.path.append('code')
from functions_response import process_fsglt, fit_normal_fsglt, fit_lognormal_fsglt

sys.path.append(os.path.join('..','colicycle','colicycle'))
import exp_parameters as ep

## Parameters

In [4]:
dirdata = os.path.join('data','2019FSGLT')
direxp = os.path.join('.','experiments')
if not os.path.isdir(direxp):
    os.makedirs(direxp)
    
exp_map = {
    'mg1655_acetate': 'MG1655 M9 acetate.csv', \
    'mg1655_glucose': 'MG1655 MOPS glucose.csv', \
    'mg1655_glycerol11aa': 'MG1655 MOPS glycerol 11aa.csv', \
    'ncm3722_arginine': 'NCM3722 MOPS arginine.csv', \
    'ncm3722_glucose': 'NCM3722 MOPS glucose.csv', \
    'ncm3722_glucose12aa': 'NCM3722 MOPS glucose 12aa.csv' \
}

## Load data

In [5]:
for name in exp_map.keys():
    filename = exp_map[name]
    fpath = os.path.join(dirdata, filename)

    with open(fpath,'r') as fin:
        df = pd.read_csv(fin, header=0)

    sel = ['elongation rate (1/hour)',
           'initiation size per ori (micron)',
           'division size (micron)', 'newborn size (micron)',
           'added size (micron)', 'cell width (micron)', 
           'generation time (minute)', 'tau_cyc (minute)', 'septum position',
           'cell ID', 'daughter ID']

    # reorder columns
    df = df[sel]

    # rename columns
    col_mapping = {
        'elongation rate (1/hour)': 'lambda', \
        'initiation size per ori (micron)': 'Lambda_i', \
        'tau_cyc (minute)': 'taucyc', \
        'division size (micron)': 'Sd', \
        'newborn size (micron)': 'Sb', \
        'added size (micron)': 'Delta_bd', \
        'cell width (micron)': 'width', \
        'generation time (minute)': 'tau', \
        'tau_cyc (minute)': 'tau_cyc', \
        'septum position': 'phi'}

    df = df.rename(columns=col_mapping)

    # convert growth rate to per minutes
    df['lambda'] = df['lambda'] / 60.
    df['tau_eff'] = np.log(2.)/df['lambda']

    # process the data frame to add attributes
    process_fsglt(df)
    
    # choose a method for the computation of delta_id
    # see source code for details
    df['delta_id'] = df['delta_id_m1']

    # save the dataframe
    fname = "{:s}.pkl".format(name)
    outputdir = os.path.join(direxp, name)
    if not os.path.isdir(outputdir):
        os.makedirs(outputdir)
    fpath = os.path.join(outputdir,'colidata.pkl')
    df.to_pickle(fpath)
    print(fpath)

Calculation of the number of oric is wrong for cell ID 6172143... ignoring it
Calculation of the number of oric is wrong for cell ID 7136083... ignoring it
./experiments/mg1655_acetate/colidata.pkl
./experiments/mg1655_glucose/colidata.pkl
Calculation of the number of oric is wrong for cell ID 1025219... ignoring it
Calculation of the number of oric is wrong for cell ID 1047095... ignoring it
Calculation of the number of oric is wrong for cell ID 1055120... ignoring it
Calculation of the number of oric is wrong for cell ID 1055138... ignoring it
Calculation of the number of oric is wrong for cell ID 1055188... ignoring it
Calculation of the number of oric is wrong for cell ID 1069075... ignoring it
Calculation of the number of oric is wrong for cell ID 1135131... ignoring it
Calculation of the number of oric is wrong for cell ID 1150178... ignoring it
Calculation of the number of oric is wrong for cell ID 1150197... ignoring it
Calculation of the number of oric is wrong for cell ID 118

Calculation of the number of oric is wrong for cell ID 7084087... ignoring it
Calculation of the number of oric is wrong for cell ID 7084203... ignoring it
Calculation of the number of oric is wrong for cell ID 7106177... ignoring it
Calculation of the number of oric is wrong for cell ID 7164201... ignoring it
Calculation of the number of oric is wrong for cell ID 7171086... ignoring it
Calculation of the number of oric is wrong for cell ID 7178207... ignoring it
Calculation of the number of oric is wrong for cell ID 7208116... ignoring it
Calculation of the number of oric is wrong for cell ID 7208135... ignoring it
Calculation of the number of oric is wrong for cell ID 7222151... ignoring it
Calculation of the number of oric is wrong for cell ID 7244150... ignoring it
Calculation of the number of oric is wrong for cell ID 7252097... ignoring it
Calculation of the number of oric is wrong for cell ID 8004168... ignoring it
Calculation of the number of oric is wrong for cell ID 8018143..

## Save parameters for simulations

In [6]:
col_mapping = {
    'tau_eff': 'tau_fit', \
    'Sb': 'Lb_fit', \
    'Sd': 'Ld_fit', \
    'Lambda_i': 'Li_fit', \
    'delta_ii': 'DLi', \
    'delta_id': 'DLdLi', \
    'Delta_bd': 'dL', \
    'phi': 'phi', \
    'mother ID': 'mother_id',\
    'cell ID': 'cell_id'}

In [7]:
for name in exp_map.keys():
    inputdir = os.path.join(direxp, name)
    fname = "{:s}.pkl".format('colidata')
    fpath = os.path.join(inputdir,fname)
    colidata = pd.read_pickle(fpath)
    print("Loaded {:s}".format(name))
    
    # rename and select useful columns
    sel = list(col_mapping.values())
    colidata = colidata.rename(columns=col_mapping)
    colidata = colidata[sel]
    colidata=colidata.set_index('cell_id')
    colidata
    
    # compute parameters
    
    tau_corr = ep.calculate_tau_correlation(colidata,'tau_fit')
    bin_pos_tau, valbins_tau, res_fit_tau = fit_lognormal_fsglt(colidata['tau_fit'],np.arange(2,6,0.1))
    bin_pos_DLi, valbins_DLi, res_fit_DLi = fit_normal_fsglt(colidata['DLi'],np.arange(0,2,0.1))
    bin_pos_DLdLi, valbins_DLdLi, res_fit_DLdLi = fit_lognormal_fsglt(colidata['DLdLi'],np.arange(-1,2,0.1))
    bin_pos_Lb, valbins_Lb, res_fit_Lb = fit_normal_fsglt(colidata['Lb_fit'],np.arange(0,5,0.1))
    bin_pos_Lblog, valbins_Lblog, res_fit_Lblog = fit_lognormal_fsglt(colidata['Lb_fit'],np.arange(-1,2,0.1))
    bin_pos_dL, valbins_dL, res_fit_dL = fit_normal_fsglt(colidata['dL'],np.arange(0,5,0.1))

    # same method won't work because only mother cells are retained here
    # no 2 cells with same mother.
    # divR_std = ep.calculate_div_ratio(colidata)
    phi = colidata['phi'].to_numpy()
    divR = phi / (1.-phi)
    divR = np.concatenate([divR, 1/divR])
    divR_std = np.nanstd(divR)

    # store the parameters
    param_storage={}
    param_storage['tau_corr'] = tau_corr
    param_storage['fit_logtau'] = res_fit_tau.x
    param_storage['fit_DLi'] = res_fit_DLi.x
    param_storage['fit_logDLdLi'] = res_fit_DLdLi.x
    param_storage['fit_Lb'] = res_fit_Lb.x
    param_storage['fit_logLb'] = res_fit_Lblog.x
    param_storage['fit_dL'] = res_fit_dL.x
    param_storage['divR_std'] = divR_std
    
    outputdir = os.path.join(direxp, name)
    fpath = os.path.join(outputdir,'simul_params.pkl')
    with open(fpath, 'wb') as f:
        pickle.dump(param_storage, f, pickle.HIGHEST_PROTOCOL)
        
    # plots
    fig,axes = plt.subplots(2,3, figsize=(15,5))
    axes[0,0].plot(bin_pos_tau, valbins_tau,'o')
    axes[0,0].plot(bin_pos_tau, ep.fun_single_gauss(bin_pos_tau, *res_fit_tau.x))
    axes[0,0].set_title('tau_fit')
    axes[0,1].plot(bin_pos_DLi, valbins_DLi,'o')
    axes[0,1].plot(bin_pos_DLi, ep.fun_single_gauss(bin_pos_DLi, *res_fit_DLi.x))
    axes[0,1].set_title('DLi')
    axes[0,2].plot(bin_pos_DLdLi, valbins_DLdLi,'o')
    axes[0,2].plot(bin_pos_DLdLi, ep.fun_single_gauss(bin_pos_DLdLi, *res_fit_DLdLi.x))
    axes[0,2].set_title('DLdLi')
    axes[1,0].plot(bin_pos_Lb, valbins_Lb,'o')
    axes[1,0].plot(bin_pos_Lb, ep.fun_single_gauss(bin_pos_Lb, *res_fit_Lb.x))
    axes[1,0].set_title('Lb')
    axes[1,1].plot(bin_pos_Lblog, valbins_Lblog,'o')
    axes[1,1].plot(bin_pos_Lblog, ep.fun_single_gauss(bin_pos_Lblog, *res_fit_Lblog.x))
    axes[1,1].set_title('Lb_log')
    axes[1,2].plot(bin_pos_dL, valbins_dL,'o')
    axes[1,2].plot(bin_pos_dL, ep.fun_single_gauss(bin_pos_dL, *res_fit_dL.x))
    axes[1,2].set_title('dL')
    
    outputdir = os.path.join(direxp, name)
    fpath = os.path.join(outputdir,'fit_figure')
    fig.savefig(fpath + '.png', dpi=300, bbox_inches='tight', pad_inches=0 )
    plt.close('all')

Loaded mg1655_acetate
Loaded mg1655_glucose
Loaded mg1655_glycerol11aa
Loaded ncm3722_arginine
Loaded ncm3722_glucose
Loaded ncm3722_glucose12aa
