### Import librairies

In [1]:
import os
import anndata
import pandas as pd
import anndata as ad
from FlowCytometryTools import FCMeasurement
import re
import numpy as np
#from list_paths import create_list_of_paths,create_list_of_paths_spec_patients
from plot_pred_vs_true import plot_result

### Create functions to load and concatenate data

In [4]:
def arcsinh_transform(X,cofactor=5):
    return np.arcsinh(X/cofactor)

In [5]:
def load_data_fcs(path,condition_name):
    data = FCMeasurement(ID="Sample", datafile=path)
    anndata = ad.AnnData(data.data)
    
    pattern = r"_(\w+)\s*-\s*(\w+)\.fcs$"
    match_stim = re.search(pattern, path)
    if match_stim:
        #stim_type = match_stim.group(1)
        cell_type = match_stim.group(2) 
        anndata.obs['drug']= condition_name
        anndata.obs['cell_type']= cell_type
    else:
        print('No cell_type found, bad format')
    anndata.X=arcsinh_transform(anndata.X)
    return anndata

In [6]:
def concatenate_2conditions_data(path_stim,stim_name,path_unstim,unstim_name,outdir_path):
    unstim_anndata = load_data_fcs(path_unstim,unstim_name)
    stim_anndata = load_data_fcs(path_stim,stim_name)

    combined_anndata = stim_anndata.concatenate(unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    combined_anndata.write(outdir_path)
    return

In [6]:
def concatenate_2conditions_multiple_data(path_stim_list,stim_name,path_unstim_list,unstim_name,outdir_path):
    comb_unstim_anndata = load_data_fcs(path_unstim_list[0],unstim_name)
    comb_stim_anndata = load_data_fcs(path_stim_list[0],stim_name)
    for path_stim in path_stim_list[1:]:
        stim_anndata = load_data_fcs(path_stim,stim_name)
        comb_stim_anndata = comb_stim_anndata.concatenate(stim_anndata)
        
    for path_unstim in path_unstim_list[1:]:
        unstim_anndata = load_data_fcs(path_unstim,unstim_name)
        comb_unstim_anndata = comb_unstim_anndata.concatenate(unstim_anndata)
    combined_anndata=comb_stim_anndata.concatenate(comb_unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    
    combined_anndata.write(outdir_path)
    return 

In [6]:
def concatenate_1condition_multiple_data(path_list,condition_name,outdir_path):
    comb_anndata = load_data_fcs(path_list[0],condition_name)

    for path in path_list[1:]:
        new_anndata = load_data_fcs(path,condition_name)
        comb_anndata = comb_anndata.concatenate(new_anndata)
    comb_anndata.write(outdir_path)
    return 

###  Examples

In [7]:
from list_paths import create_list_of_paths2,create_list_of_paths_spec_patients

In [8]:
path_unstim_list=create_list_of_paths2(directory='datasets/surge_prehab_controls - Gated Populations',stimulation='Unstim',cell_type='cMC',patient_excluded=['HCAA','IC1'])
path_stim_list=create_list_of_paths2(directory='datasets/surge_prehab_controls - Gated Populations',stimulation='LPS',cell_type='cMC',patient_excluded=['HCAA','IC1'])
res=concatenate_2conditions_multiple_data(path_stim_list,'LPS',path_unstim_list,'Unstim',"datasets/sherlock_training_data/combined_LPS_cMC_train.h5ad")


... storing 'drug' as categorical
... storing 'cell_type' as categorical


In [9]:
path_unstim_pred=create_list_of_paths_spec_patients(directory='datasets/PTB drug screen complete - Gated Populations',stimulation='Unstim',cell_type='cMC',patient='HV01')
path_stim_pred=create_list_of_paths_spec_patients(directory='datasets/PTB drug screen complete - Gated Populations',stimulation='LPS',cell_type='cMC',patient='HV01')
res=concatenate_2conditions_multiple_data(path_stim_pred,'LPS',path_unstim_pred,'Unstim',"datasets/PTB_training/LPS_cMC_HV01.h5ad")

... storing 'drug' as categorical
... storing 'cell_type' as categorical


### Pipeline for evaluation

In [116]:
eval_4i=pd.read_csv("results/LPS_cMC/model-cellot/evals_iid_data_space/evals.csv")

In [117]:
print('eval metric available:',eval_4i['metric'].unique())

eval metric available: ['l2-means' 'l2-stds' 'r2-means' 'r2-stds' 'r2-pairwise_feat_corrs'
 'l2-pairwise_feat_corrs' 'mmd' 'enrichment-k50' 'enrichment-k100']


In [118]:
eval_4i.ncells.unique()

array([ 100,  250,  500, 1000, 1500])

In [2]:
EVALUATION_MARKERS = [
    '151Eu_pp38', '155Gd_pS6', '149Sm_pCREB', 
    '159Tb_pMAPKAPK2', '166Er_pNFkB', '167Er_pERK12'
]

In [None]:
PTB_EVALUATION_MARKERS = ['151Eu_pP38', '155Gd_pS6', '149Sm_pCREB', '159Tb_pMK2', '166Er_pNFkB', '167Er_pERK']

train on CMC, pp38, pS6, LPS

In [148]:
original_path = "datasets/PTB_training/combined_LPS_cMC.h5ad"
target = ad.read(original_path)
target1 = target[:, '151Eu_pP38'].copy()

In [154]:
target1[target1.obs['condition'] == 'control'].X.flatten()

ArrayView([2.0405424, 1.9779768, 0.6029839, ..., 0.8639047, 0.902362 ,
           1.681758 ], dtype=float32)

In [None]:
for marker in PTB_EVALUATION_MARKERS:
    plot_result('results/PTB/LPS_cMC/model-cellot/pred.csv','datasets/PTB_training/LPS_cMC_HV01.h5ad',marker,f"results/PTB/LPS_cMC/model-cellot/unseen_plot/LPS_cMC_HV01_{marker}.png")