## Steady-state Single Variable Analysis

In [None]:
import trenchripper.trenchripper as tr

import dask
import warnings
import random
import anndata

import numpy as np
import pandas as pd
import dask.dataframe as dd

from matplotlib import pyplot as plt

random.seed(42)
np.random.seed(42)

warnings.filterwarnings(action='once',category=UserWarning)

# addition of active memory manager
dask.config.set({'distributed.scheduler.active-memory-manager.start': True});
dask.config.set({'distributed.scheduler.worker-ttl': "5m"});
dask.config.set({'distributed.scheduler.allowed-failures': 100});

dask_wd = "/home/de64/scratch/de64/dask"

### Initial Data Processing

#### Start Dask

In [3]:
dask_controller = tr.trcluster.dask_controller(
    walltime="01:00:00",
    local=False,
    n_workers=25,
    n_workers_min=25,
    memory="16GB",
    working_directory=dask_wd,
)
dask_controller.startdask()

50m
01:00:00


Task exception was never retrieved
future: <Task finished name='Task-2118768' coro=<Client._gather.<locals>.wait() done, defined at /home/de64/micromamba/envs/crispri/lib/python3.11/site-packages/distributed/client.py:2208> exception=AllExit()>
Traceback (most recent call last):
  File "/home/de64/micromamba/envs/crispri/lib/python3.11/site-packages/distributed/client.py", line 2217, in wait
    raise AllExit()
distributed.client.AllExit


In [5]:
dask_controller.daskclient

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.120.16.233:8787/status,

0,1
Dashboard: http://10.120.16.233:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.120.16.233:35272,Workers: 0
Dashboard: http://10.120.16.233:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Cell Cycle Analysis

### Import Necessary Dataframes

In [6]:
cell_cycle_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-10-31_Lineage_Cell_Cycle_Merged", engine="pyarrow",calculate_divisions=True)
timepoints_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-10-31_Lineage_Observations_Merged", engine="pyarrow",calculate_divisions=True)
growth_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-10-31_Lineage_Growth_Observations_Merged", engine="pyarrow",calculate_divisions=True)

#### Select Parameters to Analyze

In [7]:
cell_cycle_columns = ['Multi-Experiment Global CellID', 'Multi-Experiment FOV', 'Global CellID', 'trenchid', 'File Parquet Index', 'fov',\
       'row', 'trench', 'initial timepoints', 'Mother', 'CellID', 'Mother CellID', 'Daughter CellID 1', 'Daughter CellID 2', 'Sister CellID',\
                      'Experiment #', 'Birth: Length', 'Division: Length',\
                      'Delta: Length', 'Septum Displacement Length Normalized', 'Birth: Volume', 'Division: Volume', 'Delta: Volume',\
                      'Final time (s)','Initial time (s)','Delta time (s)','Growth Rate: Length','Growth Rate: Volume']
timepoints_columns = ['Global CellID-Cell Cycle timepoints', 'trenchid', 'Global CellID', 'Observation time (s)', 'Length',\
                     'Width', 'mCherry mean_intensity', 'Experiment #', 'Multi-Experiment Global CellID']
growth_columns = ['Global CellID-Growth Rate Measurement Index', 'trenchid', 'Global CellID', 'Measurement time (s)',\
                  'Instantaneous Growth Rate: Length', 'Instantaneous Growth Rate: Volume', 'Experiment #', 'Multi-Experiment Global CellID']

cell_cycle_df = cell_cycle_df[cell_cycle_columns]
timepoints_df = timepoints_df[timepoints_columns]
growth_df = growth_df[growth_columns]

### Classify proliferating cells (DISABLED)
- Classify trenches as proliferating or non-proliferating and exclude from subsequent analysis where appropriate
- Proliferation criterion:
    - Median instantanous growth rate for the trench must give a doubling time of at least once every 3 hours, that is $ \lambda > \frac{\ln(2)}{3} \frac{1}{hr} $ 
    - Median doubling time (for reported cell cycles) must give a doubling time of at least once every 3 hours
    - The number of cell cycle reports (of the mother cell only) must also average out to once every 3 hours

- Note (11/1/2024) - Was done for large library comparisons to ensure non-sparse cell cycle data (i.e. interdivision time and septum error).

In [8]:
induction_time = 2*3600
steady_state_time = 7*3600
final_time = 10*3600
steady_state_interval = final_time-steady_state_time

doubling_threshold = 3
growth_rate_threshold = np.log(2)/doubling_threshold
doubling_time_threshold = doubling_threshold*3600

#### Add sgRNA info for real data

In [17]:
condition_dict = {"EV":list(range(0,30)),\
              "rplL":list(range(30,60)),\
              "infA":list(range(60,90)),\
              "pheT":list(range(90,120)),\
              "0uM AHL":list(range(100000,100060)),\
              "0.03uM AHL":list(range(100060,100120)),\
              "0.05uM AHL":list(range(100120,100180)),\
              "0.07uM AHL":list(range(100180,100240)),\
              "1uM AHL":list(range(100240,100300)),\
              "0.2uM AHL":list(range(200000,200030)),\
              "0.16uM AHL":list(range(200030,200060)),\
              "0.13uM AHL":list(range(200060,200090)),\
              "0.1uM AHL":list(range(200090,200120))}
inv_condition_dict = {item:key for key,val in condition_dict.items() for item in val}

cell_cycle_df["Condition"] = cell_cycle_df["Multi-Experiment FOV"].apply(lambda x: inv_condition_dict[x], meta=str)
condition_series = cell_cycle_df.groupby("Multi-Experiment Phenotype Trenchid")["Condition"].first().compute()
timepoints_df = timepoints_df.join(condition_series)
growth_df = growth_df.join(condition_series)



In [18]:
n_obs_cell_cycle_df = cell_cycle_df.groupby("Condition",sort=False).apply(lambda x: len(x.index.unique()), meta=int).compute()
n_obs_cell_cycle_df = pd.DataFrame(n_obs_cell_cycle_df).rename({0:"N Observations"}, axis=1).sort_index()
cell_cycle_df = cell_cycle_df.join(n_obs_cell_cycle_df,on="Condition")

n_obs_timepoints = timepoints_df.groupby("Condition",sort=False).apply(lambda x: len(x.index.unique()), meta=int).compute()
n_obs_timepoints = pd.DataFrame(n_obs_timepoints).rename({0:"N Observations"}, axis=1).sort_index()
timepoints_df = timepoints_df.join(n_obs_timepoints,on="Condition")

n_obs_growth = growth_df.groupby("Condition",sort=False).apply(lambda x: len(x.index.unique()), meta=int).compute()
n_obs_growth = pd.DataFrame(n_obs_growth).rename({0:"N Observations"}, axis=1).sort_index()
growth_df = growth_df.join(n_obs_growth,on="Condition")



### Determine when cells reach steady state and save intermediate df as a checkpoint

In [19]:
## add condition index
condition_idx_dict = {"0uM AHL":0,\
              "0.03uM AHL":1,\
              "0.05uM AHL":2,\
              "0.07uM AHL":3,\
              "1uM AHL":4,\
              "0.2uM AHL":5,\
              "0.16uM AHL":6,\
              "0.13uM AHL":7,\
              "0.1uM AHL":8,\
              "EV":9,\
              "rplL":10,\
              "infA":11,\
              "pheT":12}

cell_cycle_df["Condition Index"] = cell_cycle_df["Condition"].apply(lambda x: condition_idx_dict[x],meta=("Condition Index", "int64"))
timepoints_df["Condition Index"] = timepoints_df["Condition"].apply(lambda x: condition_idx_dict[x],meta=("Condition Index", "int64"))
growth_df["Condition Index"] = growth_df["Condition"].apply(lambda x: condition_idx_dict[x],meta=("Condition Index", "int64"))

## add categories
cell_cycle_df['Category'] = cell_cycle_df["Condition Index"].apply(lambda x: "Control" if x==9 else "Not Control")
timepoints_df['Category'] = timepoints_df["Condition Index"].apply(lambda x: "Control" if x==9 else "Not Control")
growth_df['Category'] = growth_df["Condition Index"].apply(lambda x: "Control" if x==9 else "Not Control")

You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.
To provide an explicit output types or to silence this message, please provide the `meta=` keyword, as described in the map or apply function that you are using.
  Before: .apply(func)
  After:  .apply(func, meta=('Condition Index', 'object'))

You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.
To provide an explicit output types or to silence this message, please provide the `meta=` keyword, as described in the map or apply function that you are using.
  Before: .apply(func)
  After:  .apply(func, meta=('Condition Index', 'object'))

You did not provide metadata, so Dask is running your function on a small dataset to guess output types. It is possible that Dask will guess incorrectly.
To provide an explicit output types or to silenc

In [20]:
cell_cycle_df_ss = cell_cycle_df[cell_cycle_df["Initial time (s)"]>=steady_state_time]
cell_cycle_df_preinduction = cell_cycle_df[cell_cycle_df["Final time (s)"]<=induction_time]
cell_cycle_df = cell_cycle_df.reset_index().set_index("Multi-Experiment Phenotype Trenchid",sorted=True).repartition(npartitions=400)
cell_cycle_df_ss.to_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Cell_Cycle_df/", engine="pyarrow", overwrite=True)
cell_cycle_df_preinduction.to_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Cell_Cycle_df/", engine="pyarrow", overwrite=True)

timepoints_df_ss = timepoints_df[timepoints_df["Observation time (s)"]>=steady_state_time]
timepoints_df_preinduction = timepoints_df[timepoints_df["Observation time (s)"]<=induction_time]
timepoints_df = timepoints_df.reset_index().set_index("Multi-Experiment Phenotype Trenchid",sorted=True).repartition(npartitions=400)
timepoints_df_ss.to_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df/", engine="pyarrow", overwrite=True)
timepoints_df_preinduction.to_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Timepoints_df/", engine="pyarrow", overwrite=True)

growth_df_ss = growth_df[growth_df["Measurement time (s)"]>=steady_state_time]
growth_df_preinduction = growth_df[growth_df["Measurement time (s)"]<=induction_time]
growth_df = growth_df.reset_index().set_index("Multi-Experiment Phenotype Trenchid",sorted=True).repartition(npartitions=400)
growth_df_ss.to_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df/", engine="pyarrow", overwrite=True)
growth_df_preinduction.to_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Growth_df/", engine="pyarrow", overwrite=True)
dask_controller.reset_worker_memory()

Done.


## Computing Trench-wise Estimator Distributions

- These distributions can be used to construct confidence intervals (this will help with clustering)
- They can also be re-used to compute p-values (with some modification)

### Estimators of Interest

- Mean
- Median
- CV
- MAD CV
- Pearson R
- Qn (Pearson R)

### Bootstraps of Interest

- Naive Bootstrap
- Tapered Block Bootstrap
    - Using for ACFs
- Stationary Bootstrap
    - Using for location, scale and pearson correlation (single cell cycle)

## 1) Computing Estimators
- Actually computes estimators for different steady state values
- Will be merged with p-values, CIs and variances from bootstrapping

In [4]:
# dask_controller = tr.trcluster.dask_controller(
#     walltime="03:00:00",
#     local=False,
#     n_workers=200,
#     n_workers_min=100,
#     memory="16GB",
#     working_directory=dask_wd,
# )
# dask_controller.startdask()

170m
03:00:00


In [None]:
# dask_controller.daskclient

### Reload Dataframes

In [21]:
# excluding cell cycle
timepoints_df_ss = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df/", engine="pyarrow",calculate_divisions=True)
timepoints_df_preinduction = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Timepoints_df/", engine="pyarrow",calculate_divisions=True)

growth_df_ss = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df/", engine="pyarrow",calculate_divisions=True)
growth_df_preinduction = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Growth_df/", engine="pyarrow",calculate_divisions=True)

### 1a) Calculate Location, Scale and Correlation Estimates

In [22]:
steady_state_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df"
preinduction_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Timepoints_df"
single_variable_list = ['Length', 'Width', 'mCherry mean_intensity']
bivariate_variable_list = []

tr.export_estimator_df(steady_state_df_path,preinduction_df_path,single_variable_list,bivariate_variable_list,filter_proliferating=False,\
               variant_index="Condition Index",final_columns=['Category', 'Condition Index', 'Condition', 'N Observations'],control_categories=['Control'])

  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  trench_estimator_df = trench_groupby.apply(lambda x: get_estimator_df_single_trench(x,param_groups,estimator,\
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  trench_estimator_df = trench_groupby.apply(lambda x: get_estimator_df_single_trench(x,param_groups,estimator,\
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  pre_post_mask = df.groupby(trench_key).apply(lambda x: sorted(x["Induction"].unique())==["Post","Pre"]).compute()


In [23]:
steady_state_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df"
preinduction_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Growth_df"
single_variable_list = ['Instantaneous Growth Rate: Volume']
bivariate_variable_list = []

tr.export_estimator_df(steady_state_df_path,preinduction_df_path,single_variable_list,bivariate_variable_list,filter_proliferating=False,\
                   variant_index="Condition Index",final_columns=['Category', 'Condition Index', 'Condition', 'N Observations'],control_categories=['Control'])

  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  trench_estimator_df = trench_groupby.apply(lambda x: get_estimator_df_single_trench(x,param_groups,estimator,\
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  trench_estimator_df = trench_groupby.apply(lambda x: get_estimator_df_single_trench(x,param_groups,estimator,\
  Before: .apply(func)
  After:  .apply(func, meta={'x': 'f8', 'y': 'f8'}) for dataframe result
  or:     .apply(func, meta=('x', 'f8'))            for series result
  pre_post_mask = df.groupby(trench_key).apply(lambda x: sorted(x["Induction"].unique())==["Post","Pre"]).compute()


## 2) Bootstrapping Estimators
- Computing estimator bootstraps
- Using this, find: $ var(\hat\theta) $, percentile confidence intervals, and p-values

### 2a)  Bootstrapping Procedure


- First, saving a repartitioned version of the sgRNA sorted dataframe
- Then, running the aggregate bootstrapper on the trenches individually and saving the output
- Finally aggregating on the whole dataset (for the parameters and estimators of interest) and outputting the samples for later manipulation
- Of particular interest is comparing different estimators for robust measures of location, scale and the pearson correlation

#### On Bootstrapping Choices

- For the location, scale and correlation estimation, I am using a stationary bootstrap since it could be defined for entire trenches of data (i.d. mother and first daughter lineages)
- For the ACF estimation, it is necessary to use the tapered block bootstrap since the stationary bootstrap adds biases to the ACF estimation
- A drawback of this approach is that I may only use mother cells in the tapered case

### Exports Trench-wise Bootstrap (For Location, Scale and Correlation Estimators)
- Using a standard bootstrap for everything in this notebook because the timeseries is too short for the other methods

### Start a Dask For Each (since it crashes)

In [3]:
# dask_controller = tr.trcluster.dask_controller(
#     walltime="06:00:00",
#     local=False,
#     n_workers=200,
#     n_workers_min=200,
#     memory="16GB",
#     working_directory=dask_wd,
# )
# dask_controller.startdask()

350m
06:00:00


In [4]:
# dask_controller.daskclient

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.120.16.236:8787/status,

0,1
Dashboard: http://10.120.16.236:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.120.16.236:36496,Workers: 0
Dashboard: http://10.120.16.236:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [24]:
n_bootstraps_trench_density = 100
n_bootstraps_trench_aggregate = 1000
overwrite = True

estimators = [np.nanmean]
estimator_names = ["Mean"]
bivariate_variable_list = [False]
estimator_names_to_agg = ["Mean","Mean"]
unpaired_aggregators=[np.nanmedian,np.nanmean]
paired_aggregators=[]
agg_names=["Mean (Robust)","Mean (True)"]

single_variable_list = ['Length', 'Width','mCherry mean_intensity']
pearsonr_variable_list = []

steady_state_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df"
preinduction_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Timepoints_df"

tr.trenchwise_bootstrap_main(dask_controller,steady_state_df_path,preinduction_df_path,tr.bootstrap_density,estimators,estimator_names,bivariate_variable_list,\
                          single_variable_list,pearsonr_variable_list,filter_proliferating=False,\
                          variant_index='Condition Index',n_bootstraps_trench_density=n_bootstraps_trench_density,overwrite=overwrite)

0
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df_temp_filtered
Starting Run.
Done.
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Timepoints_df_temp_filtered
Starting Run.
Done.
1
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df_Trench_Estimator_Bootstrap_temp
Starting Run.
Done.
2
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Timepoints_df_Trench_Estimator_Bootstrap_temp
Starting Run.
Done.
3
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df_Trench_Estimator_Bootstrap
Starting Run.
Done.
4
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df_Trench_Estimator_Bootstrap_temp_2
Starting Run.
Done.
Done!


In [25]:
tr.trench_aggregate_bootstrap_main(dask_controller,steady_state_df_path,estimator_names,bivariate_variable_list,estimator_names_to_agg,unpaired_aggregators,paired_aggregators,\
                           agg_names,single_variable_list,pearsonr_variable_list,n_bootstraps_trench_aggregate=n_bootstraps_trench_aggregate,\
                            variant_index='Condition Index',overwrite=overwrite)

0
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df_Variant_Estimator_Bootstrap_temp
Starting Run.
Done.
1
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df_Variant_Estimator_Bootstrap
Starting Run.
Done.
2
Done!


In [26]:
single_variable_list = ['Instantaneous Growth Rate: Volume']
pearsonr_variable_list = []

steady_state_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df"
preinduction_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Growth_df"

tr.trenchwise_bootstrap_main(dask_controller,steady_state_df_path,preinduction_df_path,tr.bootstrap_density,estimators,estimator_names,bivariate_variable_list,\
                              single_variable_list,pearsonr_variable_list,filter_proliferating=False,\
                      variant_index='Condition Index',n_bootstraps_trench_density=n_bootstraps_trench_density,overwrite=overwrite)

0
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df_temp_filtered
Starting Run.
Done.
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Growth_df_temp_filtered
Starting Run.
Done.
1
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df_Trench_Estimator_Bootstrap_temp
Starting Run.
Done.
2
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Preinduction_Growth_df_Trench_Estimator_Bootstrap_temp
Starting Run.
Done.
3
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df_Trench_Estimator_Bootstrap
Starting Run.
Done.
4
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df_Trench_Estimator_Bootstrap_temp_2
Starting Run.
Done.
Done!


In [27]:
tr.trench_aggregate_bootstrap_main(dask_controller,steady_state_df_path,estimator_names,bivariate_variable_list,estimator_names_to_agg,unpaired_aggregators,paired_aggregators,\
                           agg_names,single_variable_list,pearsonr_variable_list,n_bootstraps_trench_aggregate=n_bootstraps_trench_aggregate,\
                            variant_index='Condition Index',overwrite=overwrite)

0
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df_Variant_Estimator_Bootstrap_temp
Starting Run.
Done.
1
/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df_Variant_Estimator_Bootstrap
Starting Run.
Done.
2
Done!


In [56]:
# dask_controller = tr.trcluster.dask_controller(
#     walltime="06:00:00",
#     local=False,
#     n_workers=100,
#     n_workers_min=100,
#     memory="16GB",
#     working_directory=dask_wd,
# )
# dask_controller.startdask()

350m
06:00:00


In [57]:
# dask_controller.daskclient

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.SLURMCluster
Dashboard: http://10.120.16.171:8787/status,

0,1
Dashboard: http://10.120.16.171:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.120.16.171:43556,Workers: 0
Dashboard: http://10.120.16.171:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## 3) Combining Estimates and Bootstraps
- Combine the information from the last two steps to get $ var(\hat\theta) $, confidence intervals and p-values
- Part 1: Computing $ var(\hat\theta) $ and Confidence Intervals from Bootstraps
- Part 2: Computing P-values from Control Bootstraps
- Part 3: Assembling variances, CIs and P-values into single variant dataframe for export

In [29]:
# dask_controller = tr.trcluster.dask_controller(
#     walltime="03:00:00",
#     local=False,
#     n_workers=50,
#     n_workers_min=50,
#     memory="8GB",
#     working_directory=dask_wd,
# )
# dask_controller.startdask()

In [30]:
# dask_controller.daskclient

In [31]:
steady_state_timepoints_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Timepoints_df"
steady_state_growth_df_path = "/home/de64/scratch/de64/sync_folder/2024-10-25_DE828_AHL_Titration/2024-11-01_Steady_State_Growth_df"

estimator_timepoints_stats_path = steady_state_timepoints_df_path + "_Estimators_wStats.pkl"
estimator_cell_growth_path = steady_state_growth_df_path + "_Estimators_wStats.pkl"

steady_state_estimator_path = "/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Translation_Knockdowns/Mother_Machine_Imaging/2024-11-01_Steady_State_df_Estimators_wStats.pkl"
steady_state_estimator_path_csv = "/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Translation_Knockdowns/Mother_Machine_Imaging/2024-11-01_Steady_State_df_Estimators_wStats.csv"

tr.get_final_bootstrap_output(steady_state_timepoints_df_path,include_ccf=False,filter_proliferating=False,include_pvals=False,variant_index='Condition Index')
tr.get_final_bootstrap_output(steady_state_growth_df_path,include_ccf=False,filter_proliferating=False,include_pvals=False,variant_index='Condition Index')

timepoints_estimator_df = pd.read_pickle(estimator_timepoints_stats_path)
growth_estimator_df = pd.read_pickle(estimator_cell_growth_path)

estimator_df = pd.concat([timepoints_estimator_df,growth_estimator_df]).sort_index()
estimator_df.to_pickle(steady_state_estimator_path)
estimator_df.to_csv(steady_state_estimator_path_csv)

In [35]:
dask_controller.shutdown()

Done.


2024-11-14 14:51:18,754 - distributed.deploy.adaptive_core - INFO - Adaptive stop
