### Plots for Validation Dataset
- Excluding GlyQ due to insuffecient sampling in lDE20
- Revisit if I re-run the original dataset with more FOV retention in notebook 2


In [1]:
import trenchripper as tr
import numpy as np
import pandas as pd
import seaborn as sns
import scipy as sp
import scipy.stats
from scipy.stats import ttest_ind

import sklearn as skl
import dask.dataframe as dd
import dask.array as da
import dask
import warnings
import copy
import random

from matplotlib import pyplot as plt
import matplotlib as mpl

random.seed(42)
np.random.seed(42)

dask_wd = "/home/de64/scratch/de64/dask"

### Normalize Against WT
- Changes in optical configurations resulted in a change in mCherry intensity
- Normalize the other variants against initial WT values
- The results are pretty mid; consider only including the second run (compare them in terms of focus, etc by hand)

### Endpoint Steady State Distributions

In [2]:
dask_controller = tr.trcluster.dask_controller(
    walltime="04:00:00",
    local=False,
    n_workers=25,
    n_workers_min=25,
    memory="16GB",
    working_directory=dask_wd,
)
dask_controller.startdask()

230m
04:00:00


In [4]:
dask_controller.displaydashboard()

In [5]:
cell_cycle_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2023-01-18_lDE20_Merged_Analysis/2024-01-25_lDE20_Steady_State_Cell_Cycle_df",calculate_divisions=True)
timepoints_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2023-01-18_lDE20_Merged_Analysis/2024-01-25_lDE20_Steady_State_Timepoints_df",calculate_divisions=True)
growth_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2023-01-18_lDE20_Merged_Analysis/2024-01-25_lDE20_Steady_State_Growth_df",calculate_divisions=True)

control_cell_cycle_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2021-11-12_lDE20_Validation_1/2024-03-24_Steady_State_Cell_Cycle_df",calculate_divisions=True)
control_timepoints_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2021-11-12_lDE20_Validation_1/2024-03-24_Steady_State_Timepoints_df",calculate_divisions=True)
control_growth_df = dd.read_parquet("/home/de64/scratch/de64/sync_folder/2021-11-12_lDE20_Validation_1/2024-03-24_Steady_State_Growth_df",calculate_divisions=True)

In [6]:
cell_cycle_df["Delta time (hr)"] = cell_cycle_df["Delta time (s)"]/3600
control_cell_cycle_df["Delta time (hr)"] = control_cell_cycle_df["Delta time (s)"]/3600

growth_df["Instantaneous Growth Rate: Volume"] = growth_df["Instantaneous Growth Rate: Volume"]/np.log(2)
control_growth_df["Instantaneous Growth Rate: Volume"] = control_growth_df["Instantaneous Growth Rate: Volume"]/np.log(2)
variant_lookup_df = control_cell_cycle_df.groupby("oDEPool7_id")[["Gene"]].first().compute()
variant_lookup_df.index = variant_lookup_df.index.astype(int)

cell_cycle_df = cell_cycle_df[cell_cycle_df["oDEPool7_id"].isin(variant_lookup_df.index)].compute()
timepoints_df = timepoints_df[timepoints_df["oDEPool7_id"].isin(variant_lookup_df.index)].compute()
growth_df = growth_df[growth_df["oDEPool7_id"].isin(variant_lookup_df.index)].compute()

control_cell_cycle_df = control_cell_cycle_df.compute()
control_timepoints_df = control_timepoints_df.compute()
control_growth_df = control_growth_df.compute()

cell_cycle_df["Experiment"] = "lDE20"
control_cell_cycle_df["Experiment"] = "Isolate"
timepoints_df["Experiment"] = "lDE20"
control_timepoints_df["Experiment"] = "Isolate"
growth_df["Experiment"] = "lDE20"
control_growth_df["Experiment"] = "Isolate"

cell_cycle_df.loc[cell_cycle_df["Gene"].isna(),"Gene"] = "EV"
timepoints_df.loc[timepoints_df["Gene"].isna(),"Gene"] = "EV"
growth_df.loc[growth_df["Gene"].isna(),"Gene"] = "EV"

### Save Checkpoints

In [8]:
cell_cycle_df.to_csv("/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Validation_Imaging/lDE20_Cell_Cycle_df.csv")
timepoints_df.to_csv("/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Validation_Imaging/lDE20_Timepoints_df.csv")
growth_df.to_csv("/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Validation_Imaging/lDE20_Growth_df.csv")

control_cell_cycle_df.to_csv("/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Validation_Imaging/Isolate_Cell_Cycle_df.csv")
control_timepoints_df.to_csv("/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Validation_Imaging/Isolate_Timepoints_df.csv")
control_growth_df.to_csv("/home/de64/group/de64/CRISPRi_Libraries/dev_notebooks/2024-11-23_Figure_Notebooks/Data/Validation_Imaging/Isolate_Growth_df.csv")

In [18]:
dask_controller.shutdown()

Done.


2024-11-24 14:08:41,545 - distributed.deploy.adaptive_core - INFO - Adaptive stop
