In [12]:
import os
import anndata
import pandas as pd
import anndata as ad
from FlowCytometryTools import FCMeasurement
import re

### Create functions to load and concatenate data

In [32]:
def load_data_fcs(path,condition_name):
    data = FCMeasurement(ID="Sample", datafile=path)
    anndata = ad.AnnData(data.data)
    
    pattern = r"_(\w+)\s*-\s*(\w+)\.fcs$"
    match_stim = re.search(pattern, path)
    if match_stim:
        #stim_type = match_stim.group(1)
        cell_type = match_stim.group(2) 
        anndata.obs['drug']= condition_name
        anndata.obs['cell_type']= cell_type
    return anndata

In [None]:
def concatenate_2conditions_data(path_stim,stim_name,path_unstim,unstim_name,outdir_path):
    unstim_anndata = load_data_fcs(path_unstim,unstim_name)
    stim_anndata = load_data_fcs(path_stim,stim_name)

    combined_anndata = stim_anndata.concatenate(unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    combined_anndata.write(outdir_path)
    return

In [None]:
def concatenate_2conditions_multiple_data(path_stim_list,stim_name,path_unstim_list,unstim_name,outdir_path):
    comb_unstim_anndata = load_data_fcs(path_unstim_list[0],unstim_name)
    comb_stim_anndata = load_data_fcs(path_stim_list[0],stim_name)

    for path_stim,path_unstim in zip(path_stim_list[1:],path_unstim_list[1:]):
        unstim_anndata = load_data_fcs(path_unstim,unstim_name)
        stim_anndata = load_data_fcs(path_stim,stim_name)

        comb_unstim_anndata = comb_unstim_anndata.concatenate(unstim_anndata)
        comb_stim_anndata = comb_stim_anndata.concatenate(stim_anndata)
    combined_anndata=comb_stim_anndata.concatenate(comb_unstim_anndata, batch_key="condition", batch_categories=["stim", "control"])
    
    combined_anndata.write(outdir_path)
    return 

In [None]:
def concatenate_1condition_multiple_data(path_list,condition_name,outdir_path):
    comb_anndata = load_data_fcs(path_list[0],condition_name)

    for path in path_list[1:]:
        new_anndata = load_data_fcs(path,condition_name)
        comb_anndata = comb_anndata.concatenate(new_anndata)
    comb_anndata.write(outdir_path)
    return 

In [48]:
def create_list_of_paths(directory,stimulation,cell_type=None):
    paths_list = []
    for filename in os.listdir(directory):
        if filename.endswith(".fcs") and stimulation in filename and (cell_type is None or cell_type in filename):
            paths_list.append(os.path.join(directory, filename))
    return paths_list

###  Examples

In [None]:
path_unstim='datasets/atest_data/controls_1_Surge_3_HCAA_BL_Unstim - Bcells.fcs'
path_stim = "datasets/atest_data/controls_1_Surge_3_IC1_IC1_Stim_1 - Bcells.fcs"
outdir_path = "datasets/atest_data/combined_Bcell3.h5ad"
concatenate_2conditions_data(path_stim,'Stim_1_Bcell', path_unstim,'Unstim_Bcell', outdir_path)
combined_anndata=ad.read("datasets/atest_data/combined_Bcell3.h5ad")

... storing 'drug' as categorical
... storing 'cell_type' as categorical


In [None]:
path_unstim_to_predict='datasets/surge_prehab_controls - Gated Populations/controls_1_Surge_3_HCBB_BL_Unstim - Bcells.fcs'
unstim_anndata_to_predict = load_data_fcs(path_unstim_to_predict)
unstim_anndata_to_predict.write("datasets/atest_data/unstim_Bcell_to_predict.h5ad")

... storing 'drug' as categorical


In [None]:
path_unstim_list=['datasets/surge_prehab_controls - Gated Populations/controls_1_Surge_3_HCKK_IDX_Unstim - NK.fcs']
path_stim_list=['datasets/surge_prehab_controls - Gated Populations/controls_1_Surge_3_IC1_IC1_Stim_1 - granulocytes.fcs']
res=concatenate_2conditions_multiple_data(path_stim_list,'Stim_1',path_unstim_list,'Unstim',"datasets/atest_data/combined_multiple.h5ad")

### Pipeline for evaluation

In [None]:
eval_4i=pd.read_csv("/Users/MacBook/stanford/cellot/results/4i/drug-cisplatin/model-cellot/evals_iid_data_space/evals.csv")

In [10]:
print('eval metric available:',eval_4i['metric'].unique())

eval metric available: ['l2-means' 'l2-stds' 'r2-means' 'r2-stds' 'r2-pairwise_feat_corrs'
 'l2-pairwise_feat_corrs' 'mmd' 'enrichment-k50' 'enrichment-k100']


In [12]:
eval_4i.ncells.unique()

array([100, 250, 500])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
def create_density_plots(dist_data, out_file, title_suffix=""):
    """
    Create density plots for Unstim, Stim True, and Stim Pred distributions.
    """
    sns.set_theme(style="whitegrid")
    pts_sorted = sorted(dist_data.keys())

    fig = plt.figure(figsize=(18, 18))
    fig.suptitle(f"Density Plots {title_suffix}", fontsize=16)

    rows = int(np.ceil(len(pts_sorted) / 3))
    for i, pt in enumerate(pts_sorted, start=1):
        ax = fig.add_subplot(rows, 3, i)
        cat_labels = ["Unstim", "Stim True", "Stim Pred"]
        cat_colors = ["blue", "red", "green"]
        for label, color in zip(cat_labels, cat_colors):
            arr = dist_data[pt][label]
            if arr.size > 0:
                sns.kdeplot(arr, ax=ax, label=f"{label} (n={arr.size})", color=color, fill=False)
        ax.set_title(f"Patient: {pt}")
        ax.legend(fontsize=8)

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.savefig(out_file, dpi=150)
    plt.close()

In [None]:
target=ad.read('./datasets/atest_data/combined_Bcell.h5ad')
target1=target[:,'150Nd_pSTAT5'].copy()
t=pd.DataFrame(target1.X)
stim=pd.DataFrame(target1[target1.obs['condition']=='stim'].X)
unstim=pd.DataFrame(target1[target1.obs['condition']=='control'].X)

In [96]:
def plot_result(prediction_path,original_path,marker,outdir_path):
    target=ad.read(original_path)
    target1=target[:,marker].copy()
    stim=pd.DataFrame(target1[target1.obs['condition']=='stim'].X)
    unstim=pd.DataFrame(target1[target1.obs['condition']=='control'].X)

    dataf=pd.read_csv(prediction_path)
    dataf["Stim Pred"]=dataf[marker]
    dataf['Stim True']=stim.iloc[:,0]
    dataf['Unstim']=unstim.iloc[:,0]
    
    dist_data = {
    "Patient_1": {
        "Stim True": dataf["Stim True"].values,
        "Stim Pred": dataf["Stim Pred"].values,
        "Unstim": dataf["Unstim"].values}}

    create_density_plots(dist_data, outdir_path, title_suffix="")     
    return dataf[['Stim True','Stim Pred','Unstim']]

In [98]:
plot_result('/Users/MacBook/stanford/cellot/results/test_new/unseen_data_result/prediction.csv','./datasets/atest_data/combined_Bcell.h5ad','150Nd_pSTAT5',"/Users/MacBook/stanford/cellot/plots/density_plot_test1.png")

Unnamed: 0,Stim True,Stim Pred,Unstim
0,0.000000,7.837983,0.00000
1,9.945632,3.335905,0.00000
2,6.893324,14.173313,0.00000
3,21.470419,3.335905,1.05323
4,16.403343,3.335905,0.00000
...,...,...,...
5934,5.409459,14.412180,0.00000
5935,6.349995,8.230350,0.00000
5936,3.295533,3.335905,0.00000
5937,2.568134,3.335905,0.00000
