In [12]:
import os
import anndata
import pandas as pd
import anndata as ad
from FlowCytometryTools import FCMeasurement
import re

### Create functions to load and concatenate data

In [32]:
def load_data_fcs(path,condition_name):
    data = FCMeasurement(ID="Sample", datafile=path)
    anndata = ad.AnnData(data.data)
    
    pattern = r"_(\w+)\s*-\s*(\w+)\.fcs$"
    match_stim = re.search(pattern, path)
    if match_stim:
        #stim_type = match_stim.group(1)
        cell_type = match_stim.group(2) 
        anndata.obs['drug']= condition_name
        anndata.obs['cell_type']= cell_type
    return anndata

In [None]:
def concatenate_2conditions_data(path_stim,stim_name,path_unstim,unstim_name,outdir_path):
    unstim_anndata = load_data_fcs(path_unstim,unstim_name)
    stim_anndata = load_data_fcs(path_stim,stim_name)

    combined_anndata = stim_anndata.concatenate(unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    combined_anndata.write(outdir_path)
    return

In [33]:
def concatenate_2conditions_mutliple_data(path_stim_list,stim_name,path_unstim_list,unstim_name,outdir_path):
    comb_unstim_anndata = load_data_fcs(path_unstim_list[0],unstim_name)
    comb_stim_anndata = load_data_fcs(path_stim_list[0],stim_name)

    for path_stim,path_unstim in zip(path_stim_list[1:],path_unstim_list[1:]):
        unstim_anndata = load_data_fcs(path_unstim,unstim_name)
        stim_anndata = load_data_fcs(path_stim,stim_name)

        comb_unstim_anndata = comb_unstim_anndata.concatenate(unstim_anndata)
        comb_stim_anndata = comb_stim_anndata.concatenate(stim_anndata)
    combined_anndata=comb_stim_anndata.concatenate(comb_unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    
    #combined_anndata.write(outdir_path)
    return combined_anndata

###  Examples

In [24]:
path_unstim='/Users/MacBook/stanford/cellot/datasets/atest_data/controls_1_Surge_3_HCAA_BL_Unstim - Bcells.fcs'
path_stim = "/Users/MacBook/stanford/cellot/datasets/atest_data/controls_1_Surge_3_IC1_IC1_Stim_1 - Bcells.fcs"
outdir_path = "/Users/MacBook/stanford/cellot/datasets/atest_data/combined_Bcell3.h5ad"
concatenate_2conditions_data(path_stim,'Stim_1_Bcell', path_unstim,'Unstim_Bcell', outdir_path)
combined_anndata=ad.read("/Users/MacBook/stanford/cellot/datasets/atest_data/combined_Bcell3.h5ad")

... storing 'drug' as categorical
... storing 'cell_type' as categorical


In [None]:
path_unstim_to_predict='/Users/MacBook/stanford/cellot/datasets/surge_prehab_controls - Gated Populations/controls_1_Surge_3_HCBB_BL_Unstim - Bcells.fcs'
unstim_anndata_to_predict = load_data_fcs(path_unstim_to_predict)
unstim_anndata_to_predict.write("/Users/MacBook/stanford/cellot/datasets/atest_data/unstim_Bcell_to_predict.h5ad")

... storing 'drug' as categorical


In [None]:
path_unstim_list=['/Users/MacBook/stanford/cellot/datasets/surge_prehab_controls - Gated Populations/controls_1_Surge_3_HCKK_IDX_Unstim - NK.fcs']
path_stim_list=['datasets/surge_prehab_controls - Gated Populations/controls_1_Surge_3_IC1_IC1_Stim_1 - granulocytes.fcs']
res=concatenate_2conditions_mutliple_data(path_stim_list,'Stim_1',path_unstim_list,'Unstim',"/Users/MacBook/stanford/cellot/datasets/atest_data/combined_multiple.h5ad")

### Pipeline for evaluation

In [None]:
eval_4i=pd.read_csv("/Users/MacBook/stanford/cellot/results/4i/drug-cisplatin/model-cellot/evals_iid_data_space/evals.csv")

In [10]:
print('eval metric available:',eval_4i['metric'].unique())

eval metric available: ['l2-means' 'l2-stds' 'r2-means' 'r2-stds' 'r2-pairwise_feat_corrs'
 'l2-pairwise_feat_corrs' 'mmd' 'enrichment-k50' 'enrichment-k100']


In [12]:
eval_4i.ncells.unique()

array([100, 250, 500])