In [1]:
import os
import pandas as pd
from cell_cycle_gating import run_cell_cycle_gating as rccg
from datarail.experimental_design import merge_data_metadata as mdm
import gr50
from datarail.experimental_design import plot_gr_dose_response as plot
from datarail.experimental_design import plot_cell_cycle_fractions as p
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
plt.rcParams['pdf.fonttype'] = 42

In [2]:
# Combine data from all plates into a single dataframe
os.mkdir('gr_results')
os.chdir("results/")
csv_files = [s for s in os.listdir('.') if s.startswith('summary_') & s.endswith('.csv')]
dfc = []
for obj in csv_files:
    df = pd.read_csv(obj)
    df['barcode'] = obj[8:-4]
    dfc.append(df)
dfc = pd.concat(dfc)

In [3]:
# Import metadata if it wasn't included during cell cycle gating
#dfm = pd.read_csv('../au565_metadata.csv')

In [4]:
# Merge well level cell cycle data with well level metadata
# Only needs to be run if cell cycle gating was done without metadata
# Set indices to barcode + well
#dfc.index = ["%s_%s" % (b, w) for b,w in zip(dfc.barcode, dfc.well)]
#dfm.index = ["%s_%s" % (b, w) for b,w in zip(dfm.barcode, dfm.well)]
#dfpw = pd.concat([dfc, dfm], axis=1) 
# Remove wells for which there isn't any data and duplicate columns
#dfpw = dfpw.dropna(subset=['cell_line']) 
#dfpw = dfpw.loc[:, ~dfpw.columns.duplicated()]

In [5]:
# Add corpse data if separate
dfd = pd.read_csv('../corpse_counts_au565.csv')
# Set indices to barcode + well
dfc.index = ["%s_%s" % (b, w) for b,w in zip(dfc.barcode, dfc.well)]
dfd.index = ["%s_%s" % (b, w) for b,w in zip(dfd.barcode, dfd['Well Name'])]
dfpw = pd.concat([dfc, dfd], axis=1) 
# Remove wells for which there isn't any data and duplicate columns
dfpw = dfpw.dropna(subset=['barcode']) 
dfpw = dfpw.loc[:, ~dfpw.columns.duplicated()]

In [6]:
# Check that all plates are included
dfpw.barcode.unique()

array(['190412_39I_368', '190412_39I_369', '190412_39I_367',
       '190412_39I_366', '190412_39I_365'], dtype=object)

In [7]:
# Generate counts table for GR calculations
df_counts = mdm.generate_GRinput(dfpw)
# Save counts
df_counts.to_csv('../gr_results/AU565_counts.csv')

In [8]:
# Quick check to see if cells grew over the duration of the assay
df_plates = df_counts.groupby(['cell_line', 'barcode']).mean()
df_plates = df_plates[['cell_count__time0', 'cell_count__ctrl']].copy()
# Option to save it - use to compute baseline growth rate
#df_plates.to_csv('../gr_results/AU565_control_data.csv')
df_plates

Unnamed: 0_level_0,Unnamed: 1_level_0,cell_count__time0,cell_count__ctrl
cell_line,barcode,Unnamed: 2_level_1,Unnamed: 3_level_1
AU565,190412_39I_366,969.952,2289.8
AU565,190412_39I_367,969.952,2676.0
AU565,190412_39I_368,969.952,2572.875
AU565,190412_39I_369,969.952,2576.733333


In [9]:
# GR calculations - values and metrics
# Compute GR values
grv = gr50.compute_gr(df_counts)
# Compute GR metrics
grm = gr50.gr_metrics(grv)

In [10]:
# GR static and GR toxic calculations
# Time needs to be converted to days for these rates
df_counts['timepoint_days'] = df_counts.timepoint/24
grvst = gr50.compute_gr_static_toxic(df_counts, time_col = 'timepoint_days')

198 wells or conditions have 5% fewer cells than time0 control, estimate of dead_count has been increased to compensate.
14 wells or conditions have too many cells relative to untreated control, estimate of dead_count has been reduced to compensate.


In [11]:
# mean GR toxic and static values
grvst['GR_toxic'] = grvst['GR_toxic'].astype(float)
grvstmean=grvst.groupby(['cell_line', 'agent', 'concentration', 'timepoint'], as_index=False).mean()
grvstmean.to_csv('../gr_results/grvalues_static_toxic_mean.csv')

In [13]:
# Save GR values and metrics
grv.to_csv('../gr_results/grvalues.csv')
grm.to_csv('../gr_results/grmetrics.csv')
grvst.to_csv('../gr_results/grvalues_static_toxic.csv')

In [None]:
# Plot GR dose response data
plot.plot_dose_response(grv, gr_value='GRvalue', errbar = 'sd', figname='../gr_results/gr_plots.pdf')
plot.plot_fraction_dead(grv, y_col='increase_fraction_dead', errbar = 'sd', figname = '../gr_results/inc_frac_dead.pdf')

In [None]:
# Calculate and save mean GR values
grmean=grv.groupby(['cell_line', 'agent', 'concentration', 'timepoint'], as_index=False).mean()
grmean.to_csv('../gr_results/grvalues_mean.csv')

In [None]:
# Plot summary cell cycle stacked bar graphs
p.plot(grv, figname = '../gr_results/cell_cycle_bar_plots.pdf')