In [1]:
import scanpy as sc
import anndata as adata
from scipy import sparse

In [2]:
def adata2numpy(adata):
    if sparse.issparse(adata.X):
        return adata.X.A
    else:
        return adata.X

In [3]:
names = {
    "pbmc": ['B', 'CD4T', 'CD8T', 'CD14+Mono', 'Dendritic', 'FCGR3A+Mono', 'NK'],
    "hpoly": ['Endocrine', 'Enterocyte', 'Enterocyte.Progenitor', 'Goblet', 'Stem', 'TA', 'TA.Early'],
    "study": ['B', 'CD4T', 'CD8T', 'CD14+Mono', 'Dendritic', 'FCGR3A+Mono', 'NK'],
}
keys = {
    "pbmc": ['stimulated', 'control', 'pred', 'cell_type'],
    "hpoly": ['Hpoly.Day10', 'Control', 'pred', 'cell_label'],
    "study": ['stimulated', 'control', 'pred', 'cell_type'],
}
valid = {
    "pbmc": sc.read("../../data/valid_pbmc.h5ad"),
    "hpoly": sc.read("../../data/valid_hpoly.h5ad"),
    "study": sc.read("../../data/valid_study.h5ad"),
}
stim = {
    "pbmc" : valid['pbmc'][valid['pbmc'].obs['condition'] == keys['pbmc'][0]],
    "hpoly" : valid['hpoly'][valid['hpoly'].obs['condition'] == keys['hpoly'][0]],
    "study" : valid['study'][valid['study'].obs['condition'] == keys['study'][0]],
}
ctrl = {
    "pbmc" : valid['pbmc'][valid['pbmc'].obs['condition'] == keys['pbmc'][1]],
    "hpoly" : valid['hpoly'][valid['hpoly'].obs['condition'] == keys['hpoly'][1]],
    "study" : valid['study'][valid['study'].obs['condition'] == keys['study'][1]],
}


This is where adjacency matrices should go now.
  warn(

This is where adjacency matrices should go now.
  warn(

This is where adjacency matrices should go now.
  warn(

This is where adjacency matrices should go now.
  warn(


In [4]:
def normalize(adata, name = 'pbmc'):
    pred_adata = adata[adata.obs['condition'] == 'pred']
    all_adata = pred_adata.concatenate(ctrl[name], stim[name])
    return all_adata

In [5]:
def get_all_data(name):
    now = []
    for cell_type in names[name]:
        now_adata = sc.read(f"../{name}/model_{cell_type}/result.h5ad")
        now.append(now_adata)
    now = adata.concat(now)
    return now

In [6]:
def view_adata(adata):
    print(adata.obs['condition'])
    pred = adata[adata.obs['condition'] == 'pred']
    print(pred.obs['condition'])
    stim = adata[adata.obs['condition'] == 'stimulated']
    print(stim.obs['condition'])
    ctrl = adata[adata.obs['condition'] == 'control']
    print(ctrl.obs['condition'])

In [7]:
pbmc_all_data = get_all_data("pbmc")
hpoly_all_data = get_all_data("hpoly")
study_all_data = get_all_data("study")
# view_adata(pbmc_all_data)
view_adata(hpoly_all_data)
# view_adata(study_all_data)

  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")


B1_GTACAGTGAGAAGT_Control_Endocrine-0-0    Control
B1_GCCTCATGAAGTAG_Control_Endocrine-0-0    Control
B3_GTTTAAGATCCCGT_Control_Endocrine-0-0    Control
B1_GATATCCTGTCGTA_Control_Endocrine-0-0    Control
B1_TTAGGTCTAGTACC_Control_Endocrine-0-0    Control
                                            ...   
121-2                                         pred
122-2                                         pred
123-2                                         pred
124-2                                         pred
125-2                                         pred
Name: condition, Length: 1360, dtype: category
Categories (3, object): ['Control', 'Hpoly.Day10', 'pred']
0-2      pred
1-2      pred
2-2      pred
3-2      pred
4-2      pred
         ... 
121-2    pred
122-2    pred
123-2    pred
124-2    pred
125-2    pred
Name: condition, Length: 497, dtype: category
Categories (1, object): ['pred']
Series([], Name: condition, dtype: category
Categories (0, object): [])
Series([], Name: condition, 

  utils.warn_names_duplicates("obs")


In [8]:
pbmc_all_data.write_h5ad("../all_data/scperb_pbmc_all_data.h5ad")
hpoly_all_data.write_h5ad("../all_data/scperb_hpoly_all_data.h5ad")
study_all_data.write_h5ad("../all_data/scperb_study_all_data.h5ad")

In [9]:
def get_all_scgen_data(name):
    now = []
    for cell_type in names[name]:
        now_adata = sc.read(f"../scgen_saved/{name}/model_{cell_type}/result.h5ad")
        now.append(now_adata)
    now = adata.concat(now)
    return now

In [10]:
pbmc_scgen_all_data = get_all_scgen_data("pbmc")
hpoly_scgen_all_data = get_all_scgen_data("hpoly")
study_scgen_all_data = get_all_scgen_data("study")
view_adata(pbmc_scgen_all_data)
view_adata(hpoly_scgen_all_data)
view_adata(study_scgen_all_data)

index
TCTAGTTGTCTTAC-1-control-0    control
GTAAGCACAGAACA-1-control-0    control
ATCGCGCTATGGTC-1-control-0    control
ACAACCGATCTTCA-1-control-0    control
CAATTCACCTCGCT-1-control-0    control
                               ...   
CGAACATGAAAGCA-1-control-2       pred
GGTATGACGCTTAG-1-control-2       pred
AACGTTCTCGCAAT-1-control-2       pred
GCGAGAGAAGTGCT-1-control-2       pred
GACCTCACAGATCC-1-control-2       pred
Name: condition, Length: 2911, dtype: category
Categories (3, object): ['control', 'pred', 'stimulated']
index
TCTAGTTGTCTTAC-1-control-2    pred
GTAAGCACAGAACA-1-control-2    pred
ATCGCGCTATGGTC-1-control-2    pred
ACAACCGATCTTCA-1-control-2    pred
CAATTCACCTCGCT-1-control-2    pred
                              ... 
CGAACATGAAAGCA-1-control-2    pred
GGTATGACGCTTAG-1-control-2    pred
AACGTTCTCGCAAT-1-control-2    pred
GCGAGAGAAGTGCT-1-control-2    pred
GACCTCACAGATCC-1-control-2    pred
Name: condition, Length: 936, dtype: category
Categories (1, object): ['pred']
i

In [11]:
pbmc_scgen_all_data.write_h5ad("../all_data/scgen_pbmc_all_data.h5ad")
hpoly_scgen_all_data.write_h5ad("../all_data/scgen_hpoly_all_data.h5ad")
study_scgen_all_data.write_h5ad("../all_data/scgen_study_all_data.h5ad")

In [12]:
def get_all_CVAE_data(name):
    now = []
    for cell_type in names[name]:
        now_adata = sc.read(f"../CVAE/{name}/CVAE_{cell_type}.h5ad")
        now_pred = now_adata[now_adata.obs['condition'] == 'pred']
        cell_type_valid = valid[name][(valid[name].obs[keys[name][3]] == cell_type) & (valid[name].obs['condition'] == keys[name][1])]
        now_adata = adata.AnnData(adata2numpy(now_pred), obs=cell_type_valid.obs, var = cell_type_valid.var)
        now_adata.obs[keys[name][3]] = cell_type
        now_adata.obs['condition'] = 'pred'
        now.append(now_adata)
    now = adata.concat(now)
#     now = now.concatenate(stim_pbmc, ctrl_pbmc)
    return now

In [13]:
pbmc_CVAE_all_data = normalize(get_all_CVAE_data("pbmc"), "pbmc")
hpoly_CVAE_all_data = normalize(get_all_CVAE_data("hpoly"), "hpoly")
study_CVAE_all_data = normalize(get_all_CVAE_data("study"), "study")
# print(pbmc_CVAE_all_data.obs['cell_type'])
# view_adata(pbmc_CVAE_all_data)
view_adata(hpoly_CVAE_all_data)
# view_adata(study_CVAE_all_data)


See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html

See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html


index
B1_GTACAGTGAGAAGT_Control_Endocrine-0-0                           pred
B1_GCCTCATGAAGTAG_Control_Endocrine-0-0                           pred
B3_GTTTAAGATCCCGT_Control_Endocrine-0-0                           pred
B1_GATATCCTGTCGTA_Control_Endocrine-0-0                           pred
B1_TTAGGTCTAGTACC_Control_Endocrine-0-0                           pred
                                                              ...     
B7_GCAAGACTCTGATG_Hpoly.Day10_TA.Early-1-2                 Hpoly.Day10
B7_CGCAACCTACACTG_Hpoly.Day10_Stem-1-2                     Hpoly.Day10
B8_ACTTAGCTGACTAC_Hpoly.Day10_Endocrine-1-2                Hpoly.Day10
B7_TTCAACACAAAGTG_Hpoly.Day10_Enterocyte.Progenitor-1-2    Hpoly.Day10
B7_GAGTGTTGAGAATG_Hpoly.Day10_Stem-1-2                     Hpoly.Day10
Name: condition, Length: 1389, dtype: object
index
B1_GTACAGTGAGAAGT_Control_Endocrine-0-0    pred
B1_GCCTCATGAAGTAG_Control_Endocrine-0-0    pred
B3_GTTTAAGATCCCGT_Control_Endocrine-0-0    pred
B1_GATATCCTGTCGTA_


See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html


In [14]:
pbmc_CVAE_all_data.write_h5ad("../all_data/CVAE_pbmc_all_data.h5ad")
hpoly_CVAE_all_data.write_h5ad("../all_data/CVAE_hpoly_all_data.h5ad")
study_CVAE_all_data.write_h5ad("../all_data/CVAE_study_all_data.h5ad")

In [15]:
def get_all_STGAN_data(name):
    now = []
    for cell_type in names[name]:
        now_adata = sc.read(f"../STGAN/{name}/STGAN_{cell_type}.h5ad")
        now_pred = now_adata[now_adata.obs['condition'] == 'pred']     
        cell_type_valid = valid[name][(valid[name].obs[keys[name][3]] == cell_type) & (valid[name].obs['condition'] == keys[name][1])]
        now_adata = adata.AnnData(adata2numpy(now_pred), obs=cell_type_valid.obs, var = cell_type_valid.var)
        now_adata.obs[keys[name][3]] = cell_type
        now_adata.obs['condition'] = 'pred'
        now.append(now_adata)
    now = adata.concat(now)
#     now = now.concatenate(stim_pbmc, ctrl_pbmc)
    return now

In [16]:
# pbmc_STGAN_all_data = get_all_STGAN_data("pbmc")
hpoly_STGAN_all_data = normalize(get_all_STGAN_data("hpoly"), "hpoly")
study_STGAN_all_data = normalize(get_all_STGAN_data("study"), "study")
# view_adata(pbmc_STGAN_all_data)
# view_adata(test)
# test1 = test[test.obs['condition'] == 'pred_stim']
# print(test.obs['cell_type'])
# hpoly_STGAN_all_data = get_all_STGAN_data("hpoly")
# study_STGAN_all_data = get_all_STGAN_data("study")
# view_adata(pbmc_STGAN_all_data)
view_adata(hpoly_STGAN_all_data)
view_adata(study_STGAN_all_data)


See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html

See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html


index
B1_GTACAGTGAGAAGT_Control_Endocrine-0-0                           pred
B1_GCCTCATGAAGTAG_Control_Endocrine-0-0                           pred
B3_GTTTAAGATCCCGT_Control_Endocrine-0-0                           pred
B1_GATATCCTGTCGTA_Control_Endocrine-0-0                           pred
B1_TTAGGTCTAGTACC_Control_Endocrine-0-0                           pred
                                                              ...     
B7_GCAAGACTCTGATG_Hpoly.Day10_TA.Early-1-2                 Hpoly.Day10
B7_CGCAACCTACACTG_Hpoly.Day10_Stem-1-2                     Hpoly.Day10
B8_ACTTAGCTGACTAC_Hpoly.Day10_Endocrine-1-2                Hpoly.Day10
B7_TTCAACACAAAGTG_Hpoly.Day10_Enterocyte.Progenitor-1-2    Hpoly.Day10
B7_GAGTGTTGAGAATG_Hpoly.Day10_Stem-1-2                     Hpoly.Day10
Name: condition, Length: 1389, dtype: object
index
B1_GTACAGTGAGAAGT_Control_Endocrine-0-0    pred
B1_GCCTCATGAAGTAG_Control_Endocrine-0-0    pred
B3_GTTTAAGATCCCGT_Control_Endocrine-0-0    pred
B1_GATATCCTGTCGTA_

In [17]:
# pbmc_STGAN_all_data.write_h5ad("../all_data/STGAN_pbmc_all_data.h5ad")
hpoly_STGAN_all_data.write_h5ad("../all_data/STGAN_hpoly_all_data.h5ad")
study_STGAN_all_data.write_h5ad("../all_data/STGAN_study_all_data.h5ad")

In [18]:
def get_all_WGAN_data(name):
    now = []
    for cell_type in names[name]:
        now_adata = sc.read(f"../WGAN/{name}/WGAN_{cell_type}.h5ad")
        now.append(now_adata)
    now = adata.concat(now)
    return now

In [19]:
pbmc_WGAN_all_data = normalize(get_all_WGAN_data("pbmc"), "pbmc")
hpoly_WGAN_all_data = normalize(get_all_WGAN_data("hpoly"), "hpoly")
study_WGAN_all_data = normalize(get_all_WGAN_data("study"), "study")
view_adata(pbmc_WGAN_all_data)
view_adata(hpoly_WGAN_all_data)
view_adata(study_WGAN_all_data)

  utils.warn_names_duplicates("obs")

See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")
  utils.warn_names_duplicates("obs")

See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html

See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html


0-0                                    pred
1-0                                    pred
2-0                                    pred
3-0                                    pred
4-0                                    pred
                                    ...    
ATTTGCACGAGAGC-1-stimulated-2    stimulated
ACATCACTCAGTCA-1-stimulated-2    stimulated
GAGTGTTGCACACA-1-stimulated-2    stimulated
GCGTAATGGGTAAA-1-stimulated-2    stimulated
TACATAGAGAAGGC-1-stimulated-2    stimulated
Name: condition, Length: 2911, dtype: category
Categories (3, object): ['control', 'pred', 'stimulated']
0-0     pred
1-0     pred
2-0     pred
3-0     pred
4-0     pred
        ... 
49-0    pred
50-0    pred
51-0    pred
52-0    pred
53-0    pred
Name: condition, Length: 936, dtype: category
Categories (1, object): ['pred']
ACCGAAACATTGGC-1-stimulated-2    stimulated
TAGTCTTGAGCACT-1-stimulated-2    stimulated
ATAGATTGTTCACT-1-stimulated-2    stimulated
TATGTGCTTGCAGT-1-stimulated-2    stimulated
GGAACTACTCCTC

In [20]:
pbmc_WGAN_all_data.write_h5ad("../all_data/WGAN_pbmc_all_data.h5ad")
hpoly_WGAN_all_data.write_h5ad("../all_data/WGAN_hpoly_all_data.h5ad")
study_WGAN_all_data.write_h5ad("../all_data/WGAN_study_all_data.h5ad")