### Import librairies

In [26]:
import os
import anndata
import pandas as pd
import anndata as ad
from FlowCytometryTools import FCMeasurement
import re
import numpy as np
from list_paths import create_list_of_paths,create_list_of_paths_spec_patients
from plot_pred_vs_true import plot_result

### Create functions to load and concatenate data

In [20]:
def arcsinh_transform(X,cofactor=5):
    return np.arcsinh(X/cofactor)

In [21]:
def load_data_fcs(path,condition_name):
    data = FCMeasurement(ID="Sample", datafile=path)
    anndata = ad.AnnData(data.data)
    
    pattern = r"_(\w+)\s*-\s*(\w+)\.fcs$"
    match_stim = re.search(pattern, path)
    if match_stim:
        #stim_type = match_stim.group(1)
        cell_type = match_stim.group(2) 
        anndata.obs['drug']= condition_name
        anndata.obs['cell_type']= cell_type
    else:
        print('No cell_type found, bad format')
    anndata.X=arcsinh_transform(anndata.X)
    return anndata

In [22]:
def concatenate_2conditions_data(path_stim,stim_name,path_unstim,unstim_name,outdir_path):
    unstim_anndata = load_data_fcs(path_unstim,unstim_name)
    stim_anndata = load_data_fcs(path_stim,stim_name)

    combined_anndata = stim_anndata.concatenate(unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    combined_anndata.write(outdir_path)
    return

In [93]:
def concatenate_2conditions_multiple_data(path_stim_list,stim_name,path_unstim_list,unstim_name,outdir_path):
    comb_unstim_anndata = load_data_fcs(path_unstim_list[0],unstim_name)
    comb_stim_anndata = load_data_fcs(path_stim_list[0],stim_name)
    for path_stim in path_stim_list[1:]:
        stim_anndata = load_data_fcs(path_stim,stim_name)
        comb_stim_anndata = comb_stim_anndata.concatenate(stim_anndata)
        
    for path_unstim in path_unstim_list[1:]:
        unstim_anndata = load_data_fcs(path_unstim,unstim_name)
        comb_unstim_anndata = comb_unstim_anndata.concatenate(unstim_anndata)
    combined_anndata=comb_stim_anndata.concatenate(comb_unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    
    combined_anndata.write(outdir_path)
    return combined_anndata,comb_unstim_anndata,comb_stim_anndata

In [24]:
def concatenate_1condition_multiple_data(path_list,condition_name,outdir_path):
    comb_anndata = load_data_fcs(path_list[0],condition_name)

    for path in path_list[1:]:
        new_anndata = load_data_fcs(path,condition_name)
        comb_anndata = comb_anndata.concatenate(new_anndata)
    comb_anndata.write(outdir_path)
    return 

###  Examples

In [97]:
path_unstim_list=create_list_of_paths(directory='/Users/MacBook/stanford/cellot/datasets/surge_prehab_controls - Gated Populations',stimulation='Unstim',cell_type='cMC',patient_excluded='HCAA')
path_stim_list=create_list_of_paths(directory='/Users/MacBook/stanford/cellot/datasets/surge_prehab_controls - Gated Populations',stimulation='LPS',cell_type='cMC',patient_excluded='HCAA')
res=concatenate_2conditions_multiple_data(path_stim_list,'LPS',path_unstim_list,'Unstim',"datasets/sherlock_training_data/combined_LPS_cMC.h5ad")


... storing 'drug' as categorical
... storing 'cell_type' as categorical


### Pipeline for evaluation

In [12]:
eval_4i=pd.read_csv("/Users/MacBook/stanford/cellot/results/4i/drug-cisplatin/model-cellot/evals_iid_data_space/evals.csv")

In [13]:
print('eval metric available:',eval_4i['metric'].unique())

eval metric available: ['l2-means' 'l2-stds' 'r2-means' 'r2-stds' 'r2-pairwise_feat_corrs'
 'l2-pairwise_feat_corrs' 'mmd' 'enrichment-k50' 'enrichment-k100']


In [14]:
eval_4i.ncells.unique()

array([100, 250, 500])

In [16]:
EVALUATION_MARKERS = [
    '151Eu_pp38', '155Gd_pS6', '149Sm_pCREB', 
    '159Tb_pMAPKAPK2', '166Er_pNFkB', '167Er_pERK12'
]

train on CMC, pp38, pS6, LPS

In [107]:
for marker in EVALUATION_MARKERS:
    plot_result('results/LPS_cMC/model-cellot/unseen_data/prediction_HCAA2.csv','/Users/MacBook/stanford/cellot/results/LPS_cMC/model-cellot/unseen_data/combined_LPS_cMC_HCAA2.h5ad',marker,f"results/LPS_cMC/model-cellot/unseen_data/LPS_cMC_{marker}.png")

In [64]:
combined_LPS_HCAA=ad.read('results/test_1/model-cellot/unseen_data/combined_LPS_HCAA.h5ad')

In [70]:
print('spread',np.mean(res[res.obs['condition']=='stim'].X)-np.mean(res[res.obs['condition']=='control'].X))

spread 1.918738


In [58]:
from plot_pred_vs_true import plot_result

In [63]:
pp38=plot_result('results/test_1/model-cellot/unseen_data/prediction_HCAA.csv','res.h5ad','151Eu_pp38',"results/LPS_cMC/model-cellot/unseen_data/LPS_cMC_pp38.png")

In [31]:
pp38

Unnamed: 0,Stim True,Stim Pred,Unstim
0,0.000000,-0.001274,0.000000
1,0.277723,0.023110,0.000000
2,0.000000,-0.018605,0.000000
3,0.720187,0.013127,0.000000
4,0.022050,0.523236,0.396276
...,...,...,...
11968,0.611037,0.111098,0.079069
11969,1.245900,-0.005991,0.000000
11970,0.000000,0.424030,0.337948
11971,0.192864,0.078857,0.064347
