### Import librairies

In [26]:
import os
import anndata
import pandas as pd
import anndata as ad
from FlowCytometryTools import FCMeasurement
import re
import numpy as np
from list_paths import create_list_of_paths,create_list_of_paths_spec_patients
from plot_pred_vs_true import plot_result

### Create functions to load and concatenate data

In [20]:
def arcsinh_transform(X,cofactor=5):
    return np.arcsinh(X/cofactor)

In [21]:
def load_data_fcs(path,condition_name):
    data = FCMeasurement(ID="Sample", datafile=path)
    anndata = ad.AnnData(data.data)
    
    pattern = r"_(\w+)\s*-\s*(\w+)\.fcs$"
    match_stim = re.search(pattern, path)
    if match_stim:
        #stim_type = match_stim.group(1)
        cell_type = match_stim.group(2) 
        anndata.obs['drug']= condition_name
        anndata.obs['cell_type']= cell_type
    else:
        print('No cell_type found, bad format')
    anndata.X=arcsinh_transform(anndata.X)
    return anndata

In [22]:
def concatenate_2conditions_data(path_stim,stim_name,path_unstim,unstim_name,outdir_path):
    unstim_anndata = load_data_fcs(path_unstim,unstim_name)
    stim_anndata = load_data_fcs(path_stim,stim_name)

    combined_anndata = stim_anndata.concatenate(unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    combined_anndata.write(outdir_path)
    return

In [None]:
def concatenate_2conditions_multiple_data(path_stim_list,stim_name,path_unstim_list,unstim_name,outdir_path):
    comb_unstim_anndata = load_data_fcs(path_unstim_list[0],unstim_name)
    comb_stim_anndata = load_data_fcs(path_stim_list[0],stim_name)
    for path_stim in path_stim_list[1:]:
        stim_anndata = load_data_fcs(path_stim,stim_name)
        comb_stim_anndata = comb_stim_anndata.concatenate(stim_anndata)
        
    for path_unstim in path_unstim_list[1:]:
        unstim_anndata = load_data_fcs(path_unstim,unstim_name)
        comb_unstim_anndata = comb_unstim_anndata.concatenate(unstim_anndata)
    combined_anndata=comb_stim_anndata.concatenate(comb_unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    
    combined_anndata.write(outdir_path)
    return 

In [24]:
def concatenate_1condition_multiple_data(path_list,condition_name,outdir_path):
    comb_anndata = load_data_fcs(path_list[0],condition_name)

    for path in path_list[1:]:
        new_anndata = load_data_fcs(path,condition_name)
        comb_anndata = comb_anndata.concatenate(new_anndata)
    comb_anndata.write(outdir_path)
    return 

###  Examples

In [143]:
path_unstim_list=create_list_of_paths(directory='datasets/PTB drug screen complete - Gated Populations',stimulation='Unstim',cell_type='cMC')
path_stim_list=create_list_of_paths(directory='datasets/PTB drug screen complete - Gated Populations',stimulation='LPS',cell_type='cMC')
res=concatenate_2conditions_multiple_data(path_stim_list,'LPS',path_unstim_list,'Unstim',"datasets/PTB_training/combined_LPS_cMC.h5ad")


... storing 'drug' as categorical
... storing 'cell_type' as categorical


### Pipeline for evaluation

In [116]:
eval_4i=pd.read_csv("results/LPS_cMC/model-cellot/evals_iid_data_space/evals.csv")

In [117]:
print('eval metric available:',eval_4i['metric'].unique())

eval metric available: ['l2-means' 'l2-stds' 'r2-means' 'r2-stds' 'r2-pairwise_feat_corrs'
 'l2-pairwise_feat_corrs' 'mmd' 'enrichment-k50' 'enrichment-k100']


In [118]:
eval_4i.ncells.unique()

array([ 100,  250,  500, 1000, 1500])

In [16]:
EVALUATION_MARKERS = [
    '151Eu_pp38', '155Gd_pS6', '149Sm_pCREB', 
    '159Tb_pMAPKAPK2', '166Er_pNFkB', '167Er_pERK12'
]

In [145]:
PTB_EVALUATION_MARKERS = [
    '151Eu_pP38', '155Gd_pS6', '149Sm_pCREB', 
    '159Tb_pMK2', '166Er_pNFkB', '167Er_pERK']

train on CMC, pp38, pS6, LPS

In [148]:
original_path = "datasets/PTB_training/combined_LPS_cMC.h5ad"
target = ad.read(original_path)
target1 = target[:, '151Eu_pP38'].copy()

In [154]:
target1[target1.obs['condition'] == 'control'].X.flatten()

ArrayView([2.0405424, 1.9779768, 0.6029839, ..., 0.8639047, 0.902362 ,
           1.681758 ], dtype=float32)

In [157]:
for marker in PTB_EVALUATION_MARKERS:
    plot_result('results/LPS_cMC/model-cellot/PTB/prediction.csv','datasets/PTB_training/combined_LPS_cMC.h5ad',marker,f"results/LPS_cMC/model-cellot/PTB/LPS_cMC_{marker}.png")

In [135]:
ev=pd.read_csv("results/LPS_cMC/model-cellot/evals_iid_data_space/evals_151Eu_pp38.csv")

In [142]:
ev[ev['metric']=='ks-pval']

Unnamed: 0,ncells,nfeatures,metric,value
5,100,all,ks-pval,0.5830091
14,100,all,ks-pval,0.702057
23,100,all,ks-pval,0.1548387
32,100,all,ks-pval,0.8154147
41,100,all,ks-pval,0.2111701
50,100,all,ks-pval,0.2111701
59,100,all,ks-pval,0.03638429
68,100,all,ks-pval,0.3681878
77,100,all,ks-pval,0.2111701
86,100,all,ks-pval,0.1548387
