# healthy_HCL_MCA_0

+ 整理HCL和MCA各组织的数据，并将同一物种,同一组织的数据进行合并
    + 从josn中获取各个数据集的信息
    + 数据下载
    + 合并
    + ~~由`3-1_202311160955.csv`定义`CL`字段~~`3-1_202311160955.csv`废弃
+ 此脚本不再定义`CL`字段,该字段由`healthy_HCL_MCA_2_py_Marker`定义

更新时间2024年4月07日

```shell
conda activate
cd ~/link/res_publish/run
jupyter nbconvert healthy_0_.ipynb --to python

conda activate publish
nohup python healthy_0_.py &
jobs

conda activate
echo 'finish'
```

[HCL](https://bis.zju.edu.cn/HCL/gallery.html)

[MCA](https://bis.zju.edu.cn/MCA/gallery.html)



In [1]:
import sys
from pathlib import Path
p_root = Path('~/link/res_publish').expanduser()
None if str(p_root) in sys.path else sys.path.append(str(p_root))

In [2]:
from func import *

  from .autonotebook import tqdm as notebook_tqdm
2024-04-26 10:36:08.469534: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



-------------------------help-------------------------
> parameter
    p_root	[name] res_publish
        p_run, p_plot, p_res, p_cache, p_pdf
    p_df_varmap
    map_sp_reverse
    rng
> run
    run_cross_species_models
    h5ad_to_mtx
    load_adata
    get_path_varmap
    find_path_from_para
    load_normalized_adata

> res
    get_test_result_df
    get_res_obs
    get_adata_umap
    show_umap

> plot
    get_color_map
    show_color_map
    show_color
    plot_umap
    savefig



In [3]:
File_Paths = namedtuple(
    "File_Paths",
    "root,dge,anno,gene,markers".split(","),
    defaults=(None, None),
)

def load_data_from_HCL_or_MCA(p, db, msg=True):
    # for HCL
    def untar(p, msg=True):
        if isinstance(p, str):
            p = Path(p)
        p_out = p.parent.joinpath(p.stem)
        if p_out.exists() & msg:
            print("[msg] has untar\n> %s" % str(p))
        else:
            with tarfile.open(p) as tar:
                tar.extractall(path=p_out)
        return File_Paths(
            p_out,
            p_out.joinpath(p.stem + "_dge.txt.gz"),
            p_out.joinpath(p.stem + "_Anno.csv"),
        )

    # for MCA
    def unzip(p, msg=True):
        if isinstance(p, str):
            p = Path(p)
        p_out = p.parent.joinpath(p.stem)
        if p_out.exists() & msg:
            print("[msg] has unzip\n> %s" % str(p))
        else:
            with ZipFile(p) as zf:
                zf.extractall(path=p_out)
        return File_Paths(
            p_out,
            *[
                p_out.joinpath(p.stem + suffix)
                for k, suffix in [
                    ("deg", "_dge.csv"),
                    ("anno", "_barcodes_anno.csv"),
                    ("gene", "_gene.csv"),
                    ("markers", "_markers.csv"),
                ]
            ],
        )

    def load_HCL_data(p, msg):
        out = untar(p, msg)
        adata = sc.read_csv(out.dge, delimiter=",").T
        adata.obs = adata.obs.join(pd.read_csv(out.anno, index_col=0))
        adata.obs = adata.obs.rename(columns={"CT": "pre_celltype"}).loc[
            :, ["pre_celltype"]
        ]
        return adata

    def load_MCA_data(p, msg):
        out = unzip(p, msg)
        adata = sc.read_csv(out.dge).T
        adata.obs = adata.obs.join(pd.read_csv(out.anno, index_col=0))
        adata.obs = adata.obs.rename(
            columns={"Idents.pbmc.": "pre_celltype"}
        ).loc[:, ["pre_celltype"]]
        adata.var = adata.var.join(
            pd.read_csv(out.gene, index_col=0).rename(index=str)
        )
        adata.var.index = adata.var["x"].values
        return adata

    assert db in ["MCA", "HCL"], "[Error] {}".format(db)
    res = None
    if db == "HCL":
        res = load_HCL_data(p, msg)
    if db == "MCA":
        res = load_MCA_data(p, msg)
    return res

# info of HCL and MCA

In [4]:
# info will be a pd.DataFrame after this section
info = {}

## [HCL](https://bis.zju.edu.cn/HCL/gallery.html)
> [tissueinfo_HCL](https://bis.zju.edu.cn/HCL/assets/js/tissueinfo-1_0630.js)

In [5]:
tissueinfo_HCL = {
    "Adult-Adipose1": ["Adult-Adipose1", "Adipose", "1372", "Donor20", "36-year-old", "Male", "DCD", "Abdomil", "HCL"],
    "Adult-Adrenal-Gland2": ["Adult-Adrenal-Gland2", "Adrenal-Gland", "8114", "Donor21", "52-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Adrenal-Gland3": ["Adult-Adrenal-Gland3", "Adrenal-Gland", "15083", "Donor22", "23-year-old", "Female", "DCD", "Intact tissue", "HCL"],
    "Adult-Artery1": ["Adult-Artery1", "Artery", "9652", "Donor23", "46-year-old", "Male", "DCD", "Ascending aorta", "HCL"],
    "Adult-Ascending-Colon1": ["Adult-Ascending-Colon1", "Intestine", "2026", "Donor24", "47-year-old", "Female", "DCD", "Partial tissue", "HCL"],
    "Adult-Bladder1": ["Adult-Bladder1", "Bladder", "1267", "Donor25", "52-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Bladder2": ["Adult-Bladder2", "Bladder", "2750", "Donor26", "32-year-old", "Female", "DCD", "Partial tissue", "HCL"],
    "Adult-Bone-Marrow1": ["Adult-Bone-Marrow1", "Bone-Marrow", "2261", "Donor27", "60-year-old", "Female", "Operation", "Posterior superior iliac spine puncture", "HCL"],
    "Adult-Bone-Marrow2": ["Adult-Bone-Marrow2", "Bone-Marrow", "6443", "Donor28", "49-year-old", "Female", "Operation", "Posterior superior iliac spine puncture", "HCL"],
    "Adult-Bone-Marrow-CD34N": ["Adult-Bone-Marrow-CD34N", "Bone-Marrow", "13765", "Donor57/58", "42-year-old/43-year-old", "Male", "", "Mobilized peripheral blood", "HCL"],
    "Adult-Bone-Marrow-CD34P": ["Adult-Bone-Marrow-CD34P", "Bone-Marrow", "44914", "Donor57/58", "42-year-old/43-year-old", "Male", "", "Mobilized peripheral blood", "HCL"],
    "Adult-Brain_Lake": ["Adult-Brain_Lake", "Brain", "8531", "", "20 to 51-year-old", "Female & Male", "article", "", "Lake, B.B., Chen, S., Sos, B.C., Fan, J., Kaeser, G.E., Yung, Y.C., Duong, T.E., Gao, D., Chun, J., Kharchenko, P.V., Zhang, K., 2018. Integrative single-cell alysis of transcriptiol and epigenetic states in the human adult brain. t Biotechnol 36, 70-80."],
    "Adult-Cerebellum1": ["Adult-Cerebellum1", "Brain", "7324", "Donor29", "55-year-old", "Female", "Operation", "Cerebellum", "HCL"],
    "Adult-Cervix1": ["Adult-Cervix1", "Cervix", "8096", "Donor30", "52-year-old", "Female", "Operation", "Posterior wall of uterus", "HCL"],
    "Adult-Duodenum1": ["Adult-Duodenum1", "Intestine", "4681", "Donor25", "52-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Epityphlon": ["Adult-Epityphlon", "Intestine", "4486", "Donor24", "47-year-old", "Female", "DCD", "Partial tissue", "HCL"],
    "Adult-Esophagus1": ["Adult-Esophagus1", "Esophagus", "2696", "Donor31", "45-year-old", "Male", "DCD", "Locus inferior", "HCL"],
    "Adult-Esophagus2": ["Adult-Esophagus2", "Esophagus", "8668", "Donor32", "56-year-old", "Male", "Operation", "Middle esophagus", "HCL"],
    "Adult-Fallopian-Tube1": ["Adult-Fallopian-Tube1", "Fallopian-Tube", "6556", "Donor30", "52-year-old", "Female", "Operation", "Left fallopian tube", "HCL"],
    "Adult-Gall-Bladder1": ["Adult-Gall-Bladder1", "Gall-Bladder", "9769", "Donor31", "45-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Gall-Bladder2": ["Adult-Gall-Bladder2", "Gall-Bladder", "5031", "Donor33", "58-year-old", "Male", "Operation", "Partial tissue", "HCL"],
    "Adult-Heart1": ["Adult-Heart1", "Heart", "1308", "Donor25", "52-year-old", "Male", "DCD", "Left ventricular apex", "HCL"],
    "Adult-Heart2": ["Adult-Heart2", "Heart", "1478", "Donor24", "47-year-old", "Female", "DCD", "Left ventricle", "HCL"],
    "Adult-Ileum2": ["Adult-Ileum2", "Intestine", "3367", "Donor34", "41-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-JeJunum2": ["Adult-JeJunum2", "Intestine", "5549", "Donor35", "64-year-old", "Male", "Operation", "Proximal jejunum", "HCL"],
    "Adult-Kidney2": ["Adult-Kidney2", "Kidney", "8877", "Donor36", "66-year-old", "Male", "Operation", "Adjacent normal tissue", "HCL"],
    "Adult-Kidney3": ["Adult-Kidney3", "Kidney", "9966", "Donor34", "41-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Kidney4": ["Adult-Kidney4", "Kidney", "3849", "Donor37", "57-year-old", "Male", "Operation", "left kidney", "HCL"],
    "Adult-Liver1": ["Adult-Liver1", "Liver", "1811", "Donor38", "21-year-old", "Female", "DCD", "Right lobe", "HCL"],
    "Adult-Liver2": ["Adult-Liver2", "Liver", "4377", "Donor39", "52-year-old", "Male", "Operation", "Left lateral lobe", "HCL"],
    "Adult-Liver4": ["Adult-Liver4", "Liver", "4384", "Donor40", "23-year-old", "Female", "Operation", "Left lateral lobe", "HCL"],
    "Adult-Lung1": ["Adult-Lung1", "Lung", "8426", "Donor41", "21-year-old", "Male", "DCD", "The lower lobe of right lung", "HCL"],
    "Adult-Lung2": ["Adult-Lung2", "Lung", "5849", "Donor38", "21-year-old", "Female", "DCD", "The lower lobe of right lung", "HCL"],
    "Adult-Lung3": ["Adult-Lung3", "Lung", "9603", "Donor42", "49-year-old", "Female", "Operation", "The nferior lobe of right lung", "HCL"],
    "Adult-Muscle1": ["Adult-Muscle1", "Muscle", "7775", "Donor43", "63-year-old", "Male", "Operation", "Obliquus externus abdominis", "HCL"],
    "Adult-Omentum1": ["Adult-Omentum1", "Omentum", "1487", "Donor41", "21-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Omentum2": ["Adult-Omentum2", "Omentum", "9971", "Donor34", "41-year-old", "Male", "DCD", "Partial tissue", "HCL"],
    "Adult-Omentum3": ["Adult-Omentum3", "Omentum", "1354", "Donor32", "56-year-old", "Male", "Operation", "Abdomil cavity", "HCL"],
    "Adult-Pancreas_Baron": ["Adult-Pancreas_Baron", "Pancreas", "8569", "", "17 to 59-year-old", "Female & Male", "article", "", "Baron, M., Veres, A., Wolock, S.L., Faust, A.L., Gaujoux, R., Vetere, A., Ryu, J.H., Wagner, B.K., Shen-Orr, S.S., Klein, A.M., Melton, D.A., Yai, I., 2016. A Single-Cell Transcriptomic Map of the Human and Mouse Pancreas Reveals Inter- and Intra-cell Population Structure. Cell systems 3, 346-360 e344."],
    "Adult-Pancreas_Muraro": ["Adult-Pancreas_Muraro", "Pancreas", "3051", "", "23 to 59-year-old", "Female & Male", "article", "", "Muraro, M.J., Dharmadhikari, G., Grun, D., Groen, N., Dielen, T., Jansen, E., van Gurp, L., Engelse, M.A., Carlotti, F., de Koning, E.J., van Oudearden, A., 2016. A Single-Cell Transcriptome Atlas of the Human Pancreas. Cell systems 3, 385-394 e383."],
    "Adult-Pancreas_Segerstolpe": ["Adult-Pancreas_Segerstolpe", "Pancreas", "2207", "", "22 to 57-year-old", "Female & Male", "article", "", "Segerstolpe, A., Palasantza, A., Eliasson, P., Andersson, E.M., Andreasson, A.C., Sun, X., Picelli, S., Sabirsh, A., Clausen, M., Bjursell, M.K., Smith, D.M., Kasper, M., Ammala, C., Sandberg, R., 2016. Single-Cell Transcriptome Profiling of Human Pancreatic Islets in Health and Type 2 Diabetes. Cell Metab 24, 593-607."],
    "Adult-Pancreas1": ["Adult-Pancreas1", "Pancreas", "9727", "Donor44", "43-year-old", "Female", "DCD", "Head of pancreas", "HCL"],
    "Adult-Peripheral-Blood1": ["Adult-Peripheral-Blood1", "Peripheral-Blood", "2719", "Donor45", "34-year-old", "Male", "Healthy donor", "Venous blood", "HCL"],
    "Adult-Peripheral-Blood2": ["Adult-Peripheral-Blood2", "Peripheral-Blood", "5296", "Donor46", "30-year-old", "Male", "Healthy donor", "Venous blood", "HCL"],
    "Adult-Peripheral-Blood3": ["Adult-Peripheral-Blood3", "Peripheral-Blood", "2156", "Donor47", "27-year-old", "Male", "Healthy donor", "Venous blood", "HCL"],
    "Adult-Peripheral-Blood4": ["Adult-Peripheral-Blood4", "Peripheral-Blood", "7160", "Donor48", "25-year-old", "Male", "Healthy donor", "Venous blood", "HCL"],
    "Adult-Pleura1": ["Adult-Pleura1", "Pleura", "19695", "Donor23", "46-year-old", "Male", "DCD", "Left thoracic wall", "HCL"],
    "Adult-Prostate1": ["Adult-Prostate1", "Prostate", "2445", "Donor31", "45-year-old", "Male", "DCD", "Intact tissue", "HCL"],
    "Adult-Rectum1": ["Adult-Rectum1", "Intestine", "5718", "Donor24", "47-year-old", "Female", "DCD", "Partial tissue", "HCL"],
    "Adult-Sigmoid-Colon1": ["Adult-Sigmoid-Colon1", "Intestine", "3281", "Donor24", "47-year-old", "Female", "DCD", "Lower part of the sigmoid colon", "HCL"],
    "Adult-Spleen1": ["Adult-Spleen1", "Spleen", "15806", "Donor49", "51-year-old", "Female", "DCD", "Partial tissue", "HCL"],
    "Adult-Stomach1": ["Adult-Stomach1", "Stomach", "1879", "Donor31", "45-year-old", "Male", "DCD", "Corpus", "HCL"],
    "Adult-Stomach2": ["Adult-Stomach2", "Stomach", "4669", "Donor50", "59-year-old", "Male", "Operation", "Pylorus", "HCL"],
    "Adult-Stomach3": ["Adult-Stomach3", "Stomach", "8005", "Donor51", "62-year-old", "Male", "Operation", "Gastric body", "HCL"],
    "Adult-Temporal-Lobe1": ["Adult-Temporal-Lobe1", "Brain", "9544", "Donor52", "61-year-old", "Female", "Operation", "Temporal lobe", "HCL"],
    "Adult-Thyroid1": ["Adult-Thyroid1", "Thyroid", "6319", "Donor53", "57-year-old", "Female", "Operation", "Thyroid lower left lobe", "HCL"],
    "Adult-Thyroid2": ["Adult-Thyroid2", "Thyroid", "6328", "Donor54", "61-year-old", "Female", "Operation", "Thyroid lower left lobe", "HCL"],
    "Adult-Trachea2": ["Adult-Trachea2", "Trachea", "9949", "Donor23", "46-year-old", "Male", "DCD", "Main trachea", "HCL"],
    "Adult-Transverse-Colon1": ["Adult-Transverse-Colon1", "Intestine", "5765", "Donor49", "51-year-old", "Female", "DCD", "Middle of the transverse colon", "HCL"],
    "Adult-Transverse-Colon2": ["Adult-Transverse-Colon2", "Intestine", "11229", "Donor55", "", "Female", "Operation", "The Lower part of the colon", "HCL"],
    "Adult-Ureter1": ["Adult-Ureter1", "Ureter", "2390", "Donor56", "49-year-old", "Female", "Operation", "Lower ureter", "HCL"],
    "Adult-Uterus1": ["Adult-Uterus1", "Uterus", "7694", "Donor30", "52-year-old", "Female", "Operation", "Posterior uterine wall", "HCL"],
    "Airway-Epithelium_Plasschaert": ["Airway-Epithelium_Plasschaert", "Trachea", "2970", "", "3 to 42-year-old", "Female & Male", "article", "", "Plasschaert, L.W., Zilionis, R., Choo-Wing, R., Savova, V., Knehr, J., Roma, G., Klein, A.M., Jaffe, A.B., 2018. A single-cell atlas of the airway epithelium reveals the CFTR-rich pulmory ionocyte. ture 560, 377-381."],
    "Breast-Epithelium_Nguyen": ["Breast-Epithelium_Nguyen", "Breast-Epithelium_Nguyen", "4116", "", "17 to 37-year-old", "Female", "article", "", "Nguyen, Q.H., Pervolarakis, N., Blake, K., Ma, D., Davis, R.T., James, N., Phung, A.T., Willey, E., Kumar, R., Jabart, E., Driver, I., Rock, J., Goga, A., Khan, S.A., Lawson, D.A., Werb, Z., Kessenbrock, K., 2018. Profiling human breast epithelial cells using single cell R sequencing identifies cell diversity. t Commun 9, 2028."],
    "Chorionic-Villus1": ["Chorionic-Villus1", "Chorionic-Villus", "9898", "Donor3", "13-week-old", "Male", "Abortion", "Partial tissue", "HCL"],
    "Cord-Blood1": ["Cord-Blood1", "Cord-Blood", "2150", "Donor1", "", "", "", "Cord Blood", "HCL"],
    "Cord-Blood2": ["Cord-Blood2", "Cord-Blood", "4444", "Donor2", "", "", "", "Cord Blood", "HCL"],
    "Cord-Blood-CD34P1": ["Cord-Blood-CD34P1", "Cord-Blood", "5607", "Donor1", "", "", "", "Cord Blood", "HCL"],
    "Cord-Blood-CD34P2": ["Cord-Blood-CD34P2", "Cord-Blood", "11297", "Donor2", "", "", "", "Cord Blood", "HCL"],
    "Dendritic-Cell-Monocyte_Villani": ["Dendritic-Cell-Monocyte_Villani", "Dendritic-Cell-Monocyte_Villani", "1077", "", "", "", "article", "", "Nguyen, Q.H., Pervolarakis, N., Blake, K., Ma, D., Davis, R.T., James, N., Phung, A.T., Willey, E., Kumar, R., Jabart, E., Driver, I., Rock, J., Goga, A., Khan, S.A., Lawson, D.A., Werb, Z., Kessenbrock, K., 2018. Profiling human breast epithelial cells using single cell R sequencing identifies cell diversity. t Commun 9, 2028."],
    "Embryonic-Stem-Cell": ["Embryonic-Stem-Cell", "Cultured-Cell-from-ES", "1660", "H9", "0-day", "Female", "", "", "HCL"],
    "ES-to-EB_8Day_Han": ["ES-to-EB_8Day_Han", "Cultured-Cell-from-ES", "2644", "", "", "", "article", "", "Han, X., Chen, H., Huang, D., Chen, H., Fei, L., Cheng, C., Huang, H., Yuan, G.C., Guo, G., 2018. Mapping human pluripotent stem cell differentiation pathways using high throughput single-cell R-sequencing. Genome Biol 19, 47."],
    "ES-to-Hematopoietic-Cell": ["ES-to-Hematopoietic-Cell", "Cultured-Cell-from-ES", "1115", "H9", "9-day", "Female", "", "", "HCL"],
    "Fetal-Adrenal-Gland2": ["Fetal-Adrenal-Gland2", "Adrenal-Gland", "9875", "Donor5", "12-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Adrenal-Gland3": ["Fetal-Adrenal-Gland3", "Adrenal-Gland", "2547", "Donor6", "14-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Adrenal-Gland4": ["Fetal-Adrenal-Gland4", "Adrenal-Gland", "1994", "Donor7", "12-week-old", "Male", "Abortion", "", "HCL"],
    "Fetal-Brain_Zhong": ["Fetal-Brain_Zhong", "Brain", "2298", "", "8–16 gestatiol weeks", "", "article", "", "Zhong, S., Zhang, S., Fan, X., Wu, Q., Yan, L., Dong, J., Zhang, H., Li, L., Sun, L., Pan, N., Xu, X., Tang, F., Zhang, J., Qiao, J., Wang, X., 2018. A single-cell R-seq survey of the developmental landscape of the human prefrontal cortex. ture 555, 524-528."],
    "Fetal-Brain3": ["Fetal-Brain3", "Brain", "2904", "Donor8", "13-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Brain4": ["Fetal-Brain4", "Brain", "3920", "Donor9", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Brain5": ["Fetal-Brain5", "Brain", "5096", "Donor10", "12-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Brain6": ["Fetal-Brain6", "Brain", "1705", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Calvaria1": ["Fetal-Calvaria1", "Calvaria", "5129", "Donor10", "12-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Eyes1": ["Fetal-Eyes1", "Eye", "1880", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Female-Gonad1": ["Fetal-Female-Gonad1", "Fetal-Gonad", "2710", "Donor12", "26-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Female-Gonad2": ["Fetal-Female-Gonad2", "Fetal-Gonad", "4238", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Gonad_Li": ["Fetal-Gonad_Li", "Fetal-Gonad", "2087", "", "4 to 26-week-old", "Female & Male", "article", "", "Li, L., Dong, J., Yan, L., Yong, J., Liu, X., Hu, Y., Fan, X., Wu, X., Guo, H., Wang, X., Zhu, X., Li, R., Yan, J., Wei, Y., Zhao, Y., Wang, W., Ren, Y., Yuan, P., Yan, Z., Hu, B., Guo, F., Wen, L., Tang, F., Qiao, J., 2017. Single-Cell R-Seq Alysis Maps Development of Human Germline Cells and Godal Niche Interactions. Cell Stem Cell 20, 858-873 e854."],
    "Fetal-Heart1": ["Fetal-Heart1", "Heart", "5319", "Donor13", "12-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Heart2": ["Fetal-Heart2", "Heart", "2678", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Intestine1": ["Fetal-Intestine1", "Intestine", "1448", "Donor13", "12-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Intestine2": ["Fetal-Intestine2", "Intestine", "1338", "Donor14", "10-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Intestine3": ["Fetal-Intestine3", "Intestine", "9740", "Donor8", "13-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Intestine4": ["Fetal-Intestine4", "Intestine", "6931", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Intestine5": ["Fetal-Intestine5", "Intestine", "4059", "Donor7", "12-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Kidney3": ["Fetal-Kidney3", "Kidney", "4939", "Donor8", "13-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Kidney4": ["Fetal-Kidney4", "Kidney", "4511", "Donor9", "11-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Kidney5": ["Fetal-Kidney5", "Kidney", "9932", "Donor15", "12-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Kidney6": ["Fetal-Kidney6", "Kidney", "3057", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Liver1": ["Fetal-Liver1", "Liver", "17929", "Donor12", "26-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Lung1": ["Fetal-Lung1", "Lung", "4526", "Donor13", "12-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Lung2": ["Fetal-Lung2", "Lung", "5121", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Male-Gonad1": ["Fetal-Male-Gonad1", "Fetal-Gonad", "3358", "Donor16", "12-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Male-Gonad2": ["Fetal-Male-Gonad2", "Fetal-Gonad", "9853", "Donor9", "11-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Mid-Brain_LaManno": ["Fetal-Mid-Brain_LaManno", "Brain", "1695", "", "6 to 11-week-old", "", "article", "", "La Manno, G., Gyllborg, D., Codeluppi, S., Nishimura, K., Salto, C., Zeisel, A., Borm, L.E., Stott, S.R.W., Toledo, E.M., Villaescusa, J.C., Lonnerberg, P., Ryge, J., Barker, R.A., Ares, E., Linrsson, S., 2016. Molecular Diversity of Midbrain Development in Mouse, Human, and Stem Cells. Cell 167, 566-580 e519."],
    "Fetal-Muscle1": ["Fetal-Muscle1", "Muscle", "18345", "Donor5", "12-week-old", "Male", "Abortion", "Limb", "HCL"],
    "Fetal-Pancreas1": ["Fetal-Pancreas1", "Pancreas", "8977", "Donor12", "26-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Pancreas2": ["Fetal-Pancreas2", "Pancreas", "6939", "Donor9", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Pancreas3": ["Fetal-Pancreas3", "Pancreas", "2830", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Rib2": ["Fetal-Rib2", "Rib", "1432", "Donor14", "10-week-old", "Male", "Abortion", "Partial tissue", "HCL"],
    "Fetal-Rib3": ["Fetal-Rib3", "Rib", "4560", "Donor5", "13-week-old", "Male", "Abortion", "Partial tissue", "HCL"],
    "Fetal-Skin2": ["Fetal-Skin2", "Skin", "5294", "Donor17", "7-week-old", "Female", "Abortion", "Limb", "HCL"],
    "Fetal-Skin3": ["Fetal-Skin3", "Skin", "1697", "Donor11", "11-week-old", "Female", "Abortion", "scalp", "HCL"],
    "Fetal-Spinal-Cord1": ["Fetal-Spinal-Cord1", "Spinal-Cord", "5916", "Donor8", "13-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Stomach1": ["Fetal-Stomach1", "Stomach", "1322", "Donor17", "7-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Stomach2": ["Fetal-Stomach2", "Stomach", "6631", "Donor11", "11-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Thymus1": ["Fetal-Thymus1", "Thymus", "2448", "Donor10", "12-week-old", "Female", "Abortion", "Intact tissue", "HCL"],
    "Fetal-Thymus2": ["Fetal-Thymus2", "Thymus", "2068", "Donor18", "10-week-old", "Male", "Abortion", "Intact tissue", "HCL"],
    "Haematopoietic-Stem-Cell_Velten": ["Haematopoietic-Stem-Cell_Velten", "Haematopoietic-Stem-Cell_Velten", "564", "", "", "", "article", "", "Velten, L., Haas, S.F., Raffel, S., Blaszkiewicz, S., Islam, S., Hennig, B.P., Hirche, C., Lutz, C., Buss, E.C., Nowak, D., Boch, T., Hofmann, W.K., Ho, A.D., Huber, W., Trumpp, A., Essers, M.A., Steinmetz, L.M., 2017. Human haematopoietic stem cell lineage commitment is a continuous process. ture cell biology 19, 271-281."],
    "iPS": ["iPS", "Cultured-Cell-from-iPS", "12242", "A Chinese Han iPS line", "0-day", "Male", "", "", "HCL"],
    "iPS-to-EB_18Day": ["iPS-to-EB_18Day", "Cultured-Cell-from-iPS", "9288", "A Chinese Han iPS line", "18-day", "Male", "", "", "HCL"],
    "iPS-to-EB_20Day": ["iPS-to-EB_20Day", "Cultured-Cell-from-iPS", "9140", "A Chinese Han iPS line", "20-day", "Male", "", "", "HCL"],
    "iPS-to-EB_9Day": ["iPS-to-EB_9Day", "Cultured-Cell-from-iPS", "9158", "A Chinese Han iPS line", "9-day", "Male", "", "", "HCL"],
    "Liver_Camp": ["Liver_Camp", "Liver", "303", "", "Fetal & Adult", "", "article", "", "Camp, J.G., Sekine, K., Gerber, T., Loeffler-Wirth, H., Binder, H., Gac, M., Kanton, S., Kageyama, J., Damm, G., Seehofer, D., Belicova, L., Bickle, M., Barsacchi, R., Okuda, R., Yoshizawa, E., Kimura, M., Ayabe, H., Taniguchi, H., Takebe, T., Treutlein, B., 2017. Multilineage communication regulates human liver bud development from pluripotency. ture 546, 533-538."],
    "Lympho-Myeloid-Progenitor_Paresh": ["Lympho-Myeloid-Progenitor_Paresh", "Lympho-Myeloid-Progenitor_Paresh", "415", "", "", "", "article", "", "Kharchenko, P.V., Zhang, K., 2018. Integrative single-cell alysis of transcriptiol and epigenetic states in the human adult brain. t Biotechnol 36, 70-80."],
    "Neonatal-Adrenal-Gland1": ["Neonatal-Adrenal-Gland1", "Adrenal-Gland", "5863", "Donor19", "6-day", "Female", "DCD", "Intact tissue", "HCL"],
    "Placenta_Tsang": ["Placenta_Tsang", "Placenta", "20518", "", "freshly Cesarean section-delivered placentas", "", "article", "", "Tsang, J.C.H., Vong, J.S.L., Ji, L., Poon, L.C.Y., Jiang, P., Lui, K.O., Ni, Y.B., To, K.F., Cheng, Y.K.Y., Chiu, R.W.K., Lo, Y.M.D., 2017. Integrative single-cell and cell-free plasma R transcriptomics elucidates placental cellular dymics. Proc tl Acad Sci U S A 114, E7786-E7795."],
    "Placenta_VentoTormo": ["Placenta_VentoTormo", "Placenta", "10000", "", "6–14 gestatiol weeks", "", "article", "", "Vento-Tormo, R., Efremova, M., Botting, R.A., Turco, M.Y., Vento-Tormo, M., Meyer, K.B., Park, J.E., Stephenson, E., Polanski, K., Goncalves, A., Gardner, L., Holmqvist, S., Henriksson, J., Zou, A., Sharkey, A.M., Millar, B., Innes, B., Wood, L., Wilbrey-Clark, A., Payne, R.P., Ivarsson, M.A., Lisgo, S., Filby, A., Rowitch, D.H., Bulmer, J.N., Wright, G.J., Stubbington, M.J.T., Haniffa, M., Moffett, A., Teichmann, S.A., 2018. Single-cell reconstruction of the early materl-fetal interface in humans. ture 563, 347-353."],
    "Placenta1": ["Placenta1", "Placenta", "9595", "Donor4", "10-week-old", "Female", "Abortion", "Partial tissue", "HCL"],
    "Preimplantation-Embryo": ["Preimplantation-Embryo", "Preimplantation-Embryo", "67", "", "Preimplantation embryo", "", "article", "", "Yan, L., Yang, M., Guo, H., Yang, L., Wu, J., Li, R., Liu, P., Lian, Y., Zheng, X., Yan, J., Huang, J., Li, M., Wu, X., Wen, L., Lao, K., Li, R., Qiao, J., Tang, F., 2013. Single-cell R-Seq profiling of human preimplantation embryos and embryonic stem cells. t Struct Mol Biol 20, 1131-1139. & Blakeley, P., Fogarty, N.M., Del Valle, I., Wamaitha, S.E., Hu, T.X., Elder, K., Snell, P., Christie, L., Robson, P., Niakan, K.K., 2015. Defining the three cell lineages of the human blastocyst by single-cell R-seq. Development 142, 3613."],
    "PSC-to-Pancreatic-Islet-Cell": ["PSC-to-Pancreatic-Islet-Cell", "Cultured-Cell-from-ES", "4156", "H9", "24-day", "Female", "", "", "HCL"],
    "Spermatogonial-Stem-Cell_Guo": ["Spermatogonial-Stem-Cell_Guo", "Spermatogonial-Stem-Cell_Guo", "175", "", "Adult", "Male", "article", "", "Guo, J., Grow, E.J., Yi, C., Mlcochova, H., Maher, G.J., Lindskog, C., Murphy, P.J., Wike, C.L., Carrell, D.T., Goriely, A., Hotaling, J.M., Cairns, B.R., 2017. Chromatin and Single-Cell R-Seq Profiling Reveal Dymic Sigling and Metabolic Transitions during Human Spermatogonial Stem Cell Development. Cell Stem Cell 21, 533-546 e536."],
    "Testis_Guo": ["Testis_Guo", "Testis", "6490", "", "17 to 25-year-old", "Male", "article", "", "Guo, J., Grow, E.J., Mlcochova, H., Maher, G.J., Lindskog, C., Nie, X., Guo, Y., Takei, Y., Yun, J., Cai, L., Kim, R., Carrell, D.T., Goriely, A., Hotaling, J.M., Cairns, B.R., 2018. The adult human testis transcriptiol cell atlas. Cell Res 28, 1141-1157."]

}

df_info_HCL = pd.DataFrame(
    tissueinfo_HCL.values(),
    columns='name,tissue,count,donor,age,gender,source,sample_location,note'.split(','))
df_info_HCL["url"] = df_info_HCL["name"].apply(
    lambda x: 'http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/{}.tar'.format(x))
df_info_HCL['sp'] = 'human'
df_info_HCL['db'] = 'HCL'
display(df_info_HCL.head(2))
info['HCL'] = df_info_HCL
df_info_HCL['k'] = tissueinfo_HCL.keys()
print("[name != k] {}".format(df_info_HCL.query("name != k").shape[0]))
del df_info_HCL

Unnamed: 0,name,tissue,count,donor,age,gender,source,sample_location,note,url,sp,db
0,Adult-Adipose1,Adipose,1372,Donor20,36-year-old,Male,DCD,Abdomil,HCL,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,human,HCL
1,Adult-Adrenal-Gland2,Adrenal-Gland,8114,Donor21,52-year-old,Male,DCD,Partial tissue,HCL,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,human,HCL


[name != k] 0


## [MCA](https://bis.zju.edu.cn/MCA/gallery.html)
[tissueinfo_MCA](https://bis.zju.edu.cn/MCA/assets/js/tissueinfo-2022.js)

In [6]:
tissueinfo_MCA = {
    "Adult-Adrenal-Gland": ["Adult-Adrenal-Gland", "Adrenal-Gland", "11815", "Adult", "MCA2.0", "MCA2.0"],
    "Adult-Bladder": ["Adult-Bladder", "Bladder", "2746", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Bladder": ["One-Year-Bladder", "Bladder", "2411", "One-Year", "MCA2.0", "MCA2.0"],
    "Eighteen-Months-Bladder": ["Eighteen-Months-Bladder", "Bladder", "9890", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Bladder": ["Two-Years-Bladder", "Bladder", "17618", "Two-Years", "MCA3.0", "MCA3.0"],
    "Bone-Marrow": ["Bone-Marrow", "Bone-Marrow", "9049", "Adult", "MCA2.0", "MCA2.0"],
    "Bone-Marrow_c-kit": ["Bone-Marrow_c-kit", "Bone-Marrow", "26483", "Adult", "MCA2.0", "MCA2.0"],
    "Bone-Marrow-Mesenchyme": ["Bone-Marrow-Mesenchyme", "Bone-Marrow", "7365", "Adult", "MCA2.0", "MCA2.0"],
    "E18-Brain": ["E18-Brain", "Brain", "8994", "Fetal", "Website", "https://support.10xgenomics.com/single-cell-gene-expression/datasets/2.0.1/neuron_9k"],
    "Fetal-Brain": ["Fetal-Brain", "Brain", "4369", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Brain": ["Neonatal-Brain", "Brain", "9265", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Brain": ["Ten-Days-Brain", "Brain", "6100", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Brain": ["Three-Weeks-Brain", "Brain", "4435", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Brain": ["Adult-Brain", "Brain", "4038", "Adult", "MCA2.0", "MCA2.0"],
    "Arc-ME": ["Arc-ME", "Brain", "20689", "Adult", "Article", "Campbell J N, Macosko E Z, Fenselau H, et al. A molecular census of arcuate hypothalamus and median eminence cell types[J]. Nature Neuroscience, 2017, 20(3):484."],
    "One-Year-Brain": ["One-Year-Brain", "Brain", "3770", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Brain": ["Eighteen-Months-Brain", "Brain", "9298", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Brain": ["Two-Years-Brain", "Brain", "11900", "Two-Years", "MCA3.0", "MCA3.0"],
    "Neonatal-Calvaria": ["Neonatal-Calvaria", "Calvaria", "7964", "Neonatal", "MCA2.0", "MCA2.0"],
    "E6.5-E8.5-Embryo": ["E6.5-E8.5-Embryo", "Embryo", "7484", "Embryo", "Article", "Pijuan-Sala, B., Griffiths, J.A., Guibentif, C. et al. A single-cell molecular map of mouse gastrulation and early organogenesis. Nature 566, 490锟?95 (2019). https://doi.org/10.1038/s41586-019-0933-9"],
    "E8.25-Embryo": ["E8.25-Embryo", "Embryo", "19386", "Embryo", "Article", "Ibarra-Soria X, Jawaid W, Pijuan-Sala B, et al. Defining murine organogenesis at single-cell resolution reveals a role for the leukotriene pathway in regulating blood progenitor formation[J]. Nature Cell Biology, 2018."],
    "E10.5DayEmbryo": ["E10.5DayEmbryo", "Embryo", "26551", "Embryo", "MCA2.0", "MCA2.0"],
    "E12.5DayEmbryo": ["E12.5DayEmbryo", "Embryo", "73685", "Embryo", "MCA2.0", "MCA2.0"],
    "Embryonic-Mesenchyme": ["Embryonic-Mesenchyme", "Embryo", "2771", "Embryo", "MCA2.0", "MCA2.0"],
    "Preimplantation-Embryo": ["Preimplantation-Embryo", "Embryo", "161", "Embryo", "Article", "Deng Q, Ramskold D, Reinius B, et al. Single-Cell RNA-Seq Reveals Dynamic, Random Monoallelic Gene Expression in Mammalian Cells[J]. Science, 2014, 343(6167):193-196. and Posfai E, Petropoulos S, de Barros FRO, Schell JP et al. Position- and Hippo signaling-dependent plasticity during lineage segregation in the early mouse embryo. Elife 2017 Feb 22;6."],
    "Fetal-Heart": ["Fetal-Heart", "Heart", "10123", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Heart": ["Neonatal-Heart", "Heart", "3948", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Heart": ["Ten-Days-Heart", "Heart", "5383", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Heart": ["Three-Weeks-Heart", "Heart", "4054", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Heart": ["Adult-Heart", "Heart", "14245", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Heart": ["One-Year-Heart", "Heart", "4092", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Heart": ["Eighteen-Months-Heart", "Heart", "9617", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Heart": ["Two-Years-Heart", "Heart", "23218", "Two-Years", "MCA3.0", "MCA3.0"],
    "Fetal-Intestine": ["Fetal-Intestine", "Intestine", "6076", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Intestine": ["Neonatal-Intestine", "Intestine", "9101", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Intestine": ["Ten-Days-Intestine", "Intestine", "17909", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Intestine": ["Three-Weeks-Intestine", "Intestine", "9365", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Two-Years-Intestine": ["Two-Years-Intestine", "Intestine", "26659", "Two-Years", "MCA3.0", "MCA3.0"],
    "Small-Intestinal-Epithelium": ["Small-Intestinal-Epithelium", "Intestine", "5831", "Adult", "Article", "Haber, A., Biton, M., Rogel, N. et al. A single-cell survey of the small intestinal epithelium. Nature 551, 333锟紺339 (2017)."],
    "One-Year-Intestine": ["One-Year-Intestine", "Intestine", "5875", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Intestine": ["Eighteen-Months-Intestine", "Intestine", "5063", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Adult-Intestine": ["Adult-Intestine", "Intestine", "6684", "Adult", "MCA2.0", "MCA2.0"],
    "Fetal-Kidney": ["Fetal-Kidney", "Kidney", "9432", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Kidney": ["Neonatal-Kidney", "Kidney", "13155", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Kidney": ["Ten-Days-Kidney", "Kidney", "12129", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Kidney": ["Three-Weeks-Kidney", "Kidney", "5700", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Two-Years-Kidney": ["Two-Years-Kidney", "Kidney", "10233", "Two-Years", "MCA3.0", "MCA3.0"],
    "One-Year-Kidney": ["One-Year-Kidney", "Kidney", "11474", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Kidney": ["Eighteen-Months-Kidney", "Kidney", "8995", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Adult-Kidney": ["Adult-Kidney", "Kidney", "4673", "Adult", "MCA2.0", "MCA2.0"],
    "Fetal-Liver": ["Fetal-Liver", "Liver", "2699", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Liver": ["Neonatal-Liver", "Liver", "9980", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Liver": ["Ten-Days-Liver", "Liver", "9259", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Liver": ["Three-Weeks-Liver", "Liver", "5867", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Liver": ["Adult-Liver", "Liver", "4685", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Liver": ["One-Year-Liver", "Liver", "9907", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Liver": ["Eighteen-Months-Liver", "Liver", "8755", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Liver": ["Two-Years-Liver", "Liver", "22675", "Two-Years", "MCA3.0", "MCA3.0"],
    "Fetal-Lung": ["Fetal-Lung", "Lung", "6453", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Lung": ["Neonatal-Lung", "Lung", "5906", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Lung": ["Ten-Days-Lung", "Lung", "11314", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Lung": ["Three-Weeks-Lung", "Lung", "6391", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Lung": ["Adult-Lung", "Lung", "6940", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Lung": ["One-Year-Lung", "Lung", "4976", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Lung": ["Eighteen-Months-Lung", "Lung", "7302", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Lung": ["Two-Years-Lung", "Lung", "9443", "Two-Years", "MCA3.0", "MCA3.0"],
    "Lung-Mesenchyme": ["Lung-Mesenchyme", "Lung", "5691", "Adult", "Article", "Zepp J A, Zacharias W J, Frank D B, et al. Distinct Mesenchymal Lineages and Niches Promote Epithelial Self-Renewal and Myofibrogenesis in the Lung[J]. Cell, 2017:1134-1148."],
    "Mammary-Gland-Involution": ["Mammary-Gland-Involution", "Mammary-Gland", "4821", "Adult", "MCA2.0", "MCA2.0"],
    "Mammary-Gland-Lactation": ["Mammary-Gland-Lactation", "Mammary-Gland", "13538", "Adult", "MCA2.0", "MCA2.0"],
    "Mammary-Gland-Pregrancy": ["Mammary-Gland-Pregrancy", "Mammary-Gland", "4909", "Adult", "MCA2.0", "MCA2.0"],
    "Mammary-Gland-Virgin": ["Mammary-Gland-Virgin", "Mammary-Gland", "5380", "Adult", "MCA2.0", "MCA2.0"],
    "Neonatal-Muscle": ["Neonatal-Muscle", "Muscle", "4873", "Neonatal", "MCA2.0", "MCA2.0"],
    "Adult-Muscle": ["Adult-Muscle", "Muscle", "1102", "Adult", "MCA2.0", "MCA2.0"],
    "Adult-Omentum": ["Adult-Omentum", "Omentum", "4978", "Adult", "MCA2.0", "MCA2.0"],
    "Adult-Ovary": ["Adult-Ovary", "Ovary", "4363", "Adult", "MCA2.0", "MCA2.0"],
    "Fetal-Pancreas": ["Fetal-Pancreas", "Pancreas", "11983", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Pancreas": ["Neonatal-Pancreas", "Pancreas", "5639", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Pancreas": ["Ten-Days-Pancreas", "Pancreas", "11007", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Pancreas": ["Three-Weeks-Pancreas", "Pancreas", "4858", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Pancreas": ["Adult-Pancreas", "Pancreas", "3610", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Pancreas": ["Two-Years-Pancreas", "Pancreas", "3610", "Two-Years", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Pancreas": ["Eighteen-Months-Pancreas", "Pancreas", "10171", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Pancreas": ["Two-Years-Pancreas", "Pancreas", "14112", "Two-Years", "MCA3.0", "MCA3.0"],
    "Pancreatic-Islet": ["Pancreatic-Islet", "Pancreas", "1886", "Adult", "Article", "Baron M, Veres A, Wolock S L, et al. A Single-Cell Transcriptomic Map of the Human and Mouse Pancreas Reveals Inter- and Intra-cell Population Structure[J]. Cell Syst, 2016, 3(4):346-360.e4."],
    "Peripheral-Blood": ["Peripheral-Blood", "Peripheral-Blood", "7095", "Adult", "MCA2.0", "MCA2.0"],
    "Placenta": ["Placenta", "Placenta", "4346", "Embryo", "MCA2.0", "MCA2.0"],
    "Adult-Pleura": ["Adult-Pleura", "Pleura", "4866", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Prostate": ["One-Year-Prostate", "Prostate", "8129", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Prostate": ["Eighteen-Months-Prostate", "Prostate", "8287", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Prostate": ["Two-Years-Prostate", "Prostate", "11561", "Two-Years", "MCA3.0", "MCA3.0"],
    "Adult-Prostate": ["Adult-Prostate", "Prostate", "8287", "Adult", "MCA2.0", "MCA2.0"],
    "Retina": ["Retina", "Retina", "8303", "Adult", "Article", "Macosko, Evan , Basu, et al. Highly Parallel Genome-wide Expression Profiling of Individual Cells Using Nanoliter Droplets[J]. Cell, 2015, 161(5):1202."],
    "Neonatal-Rib": ["Neonatal-Rib", "Rib", "6262", "Neonatal", "MCA2.0", "MCA2.0"],
    "Neonatal-Skin": ["Neonatal-Skin", "Skin", "3392", "Neonatal", "MCA2.0", "MCA2.0"],
    "Adult-Spleen": ["Adult-Spleen", "Spleen", "1970", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Spleen": ["One-Year-Spleen", "Spleen", "2032", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Spleen": ["Eighteen-Months-Spleen", "Spleen", "5339", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Spleen": ["Two-Years-Spleen", "Spleen", "23179", "Two-Years", "MCA3.0", "MCA3.0"],
    "Embryonic-Stem-Cell": ["Embryonic-Stem-Cell", "Stem-Cell", "9991", "Stem cell", "MCA2.0", "MCA2.0"],
    "Mesenchymal-Stem-Cell-Cultured": ["Mesenchymal-Stem-Cell-Cultured", "Stem-Cell", "7319", "Stem cell", "MCA2.0", "MCA2.0"],
    "Trophoblast-Stem-Cell": ["Trophoblast-Stem-Cell", "Stem-Cell", "19489", "Stem cell", "MCA2.0", "MCA2.0"],
    "Fetal-Stomach": ["Fetal-Stomach", "Stomach", "15599", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Stomach": ["Neonatal-Stomach", "Stomach", "4073", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Stomach": ["Ten-Days-Stomach", "Stomach", "22599", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Stomach": ["Three-Weeks-Stomach", "Stomach", "9959", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Stomach": ["Adult-Stomach", "Stomach", "13278", "Adult", "MCA2.0", "MCA2.0"],
    "Eighteen-Months-Stomach": ["Eighteen-Months-Stomach", "Stomach", "18234", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "One-Year-Stomach": ["One-Year-Stomach", "Stomach", "10713", "One-Year", "MCA3.0", "MCA3.0"],
    "Two-Years-Stomach": ["Two-Years-Stomach", "Stomach", "5294", "Two-Years", "MCA3.0", "MCA3.0"],
    "Fetal-Testis": ["Fetal-Testis", "Testis", "6044", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Testis": ["Neonatal-Testis", "Testis", "9034", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Testis": ["Ten-Days-Testis", "Testis", "15808", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Testis": ["Three-Weeks-Testis", "Testis", "9095", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Testis": ["Adult-Testis", "Testis", "18530", "Adult", "MCA2.0", "MCA2.0"],
    "One-Year-Testis": ["One-Year-Testis", "Testis", "7688", "One-Year", "MCA3.0", "MCA3.0"],
    "Eighteen-Months-Testis": ["Eighteen-Months-Testis", "Testis", "9866", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "Two-Years-Testis": ["Two-Years-Testis", "Testis", "15580", "Two-Years", "MCA3.0", "MCA3.0"],
    "Adult-Thymus": ["Adult-Thymus", "Thymus", "4289", "Adult", "MCA2.0", "MCA2.0"],
    "Eighteen-Months-Thymus": ["Eighteen-Months-Thymus", "Thymus", "4457", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "One-Year-Thymus": ["One-Year-Thymus", "Thymus", "2732", "One-Year", "MCA3.0", "MCA3.0"],
    "Fetal-Uterus": ["Fetal-Uterus", "Uterus", "8462", "Fetal", "MCA2.0", "MCA2.0"],
    "Neonatal-Uterus": ["Neonatal-Uterus", "Uterus", "4561", "Neonatal", "MCA2.0", "MCA2.0"],
    "Ten-Days-Uterus": ["Ten-Days-Uterus", "Uterus", "4841", "Ten-Days", "MCA2.0", "MCA2.0"],
    "Three-Weeks-Uterus": ["Three-Weeks-Uterus", "Uterus", "9077", "Three-Weeks", "MCA2.0", "MCA2.0"],
    "Adult-Uterus": ["Adult-Uterus", "Uterus", "3739", "Adult", "MCA2.0", "MCA2.0"],
    "Eighteen-Months-Uterus": ["Eighteen-Months-Uterus", "Uterus", "5833", "Eighteen-Months", "MCA3.0", "MCA3.0"],
    "One-Year-Uterus": ["One-Year-Uterus", "Uterus", "13405", "One-Year", "MCA3.0", "MCA3.0"],
    "Two-Years-Uterus": ["Two-Years-Uterus", "Uterus", "21286", "Two-Years", "MCA3.0", "MCA3.0"],
}
# modify
tissueinfo_MCA.pop('Bone-Marrow_c-kit', None)
tissueinfo_MCA.update({
    "One-Year-Pancreas": ["One-Year-Pancreas", "Pancreas", "3610", "Two-Years", "MCA3.0" "MCA3.0"],
    "Bone-Marrow-c-kit": ["Bone-Marrow-c-kit", "Bone-Marrow", "26483", "Adult", "MCA2.0", "MCA2.0"]
})

df_info_MCA = pd.DataFrame(
    tissueinfo_MCA.values(),
    columns='name,tissue,count,age,source,note'.split(','))
df_info_MCA["url"] = df_info_MCA["name"].apply(
    lambda x: 'https://bis.zju.edu.cn/MCA/data/dge/{}.zip'.format(x))
df_info_MCA['sp'] = 'mouse'
df_info_MCA['db'] = 'MCA'
display(df_info_MCA.head(2))
info['MCA'] = df_info_MCA
df_info_MCA['k'] = tissueinfo_MCA.keys()
print("[name != k] {}".format(df_info_MCA.query("name != k").shape[0]))
display(df_info_MCA.query("name != k"))
del df_info_MCA

Unnamed: 0,name,tissue,count,age,source,note,url,sp,db
0,Adult-Adrenal-Gland,Adrenal-Gland,11815,Adult,MCA2.0,MCA2.0,https://bis.zju.edu.cn/MCA/data/dge/Adult-Adre...,mouse,MCA
1,Adult-Bladder,Bladder,2746,Adult,MCA2.0,MCA2.0,https://bis.zju.edu.cn/MCA/data/dge/Adult-Blad...,mouse,MCA


[name != k] 0


Unnamed: 0,name,tissue,count,age,source,note,url,sp,db,k


In [7]:
info = {k: v.loc[:, 'db,sp,name,tissue,url,count,age,note,source'.split(
    ',')] for k, v in info.items()}
info = pd.concat(info.values())
display(info.head(), info.shape)

Unnamed: 0,db,sp,name,tissue,url,count,age,note,source
0,HCL,human,Adult-Adipose1,Adipose,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,1372,36-year-old,HCL,DCD
1,HCL,human,Adult-Adrenal-Gland2,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,8114,52-year-old,HCL,DCD
2,HCL,human,Adult-Adrenal-Gland3,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,15083,23-year-old,HCL,DCD
3,HCL,human,Adult-Artery1,Artery,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,9652,46-year-old,HCL,DCD
4,HCL,human,Adult-Ascending-Colon1,Intestine,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,2026,47-year-old,HCL,DCD


(259, 9)

# Download

you can download the data from the `url` field above and save in the path of  `file_path` field below

In [8]:
info['file_path'] = info.apply(
    lambda row: p_cache.joinpath('healthy',
                                 row['db'], Path(
                                     row['url']).name), axis=1)
# print(*np.sort(info['tissue'].unique()),sep='\n')
query_tissue = 'Adrenal-Gland,Bone-Marrow,Brain,Heart,Intestine,Kidney,Liver,Lung,Pancreas,Spleen,Stomach'.split(
    ',')
info = info.query(
    "tissue in @query_tissue").sort_values('tissue,db,name'.split(','))
info.index = np.arange(info.shape[0])
display(info.head(), info.shape)
print('[info.name] is_unique {}'.format(info['name'].is_unique))

# check exists
if info['file_path'].apply(lambda x: x.exists()).all():
    print('[all data is exists]'.ljust(50, '-'))
else:
    display(info['file_path'].apply(lambda x: x.exists()).value_counts())
    print('[not exists]'.ljust(50, '-'))
    display(info[~info['file_path'].apply(lambda x: x.exists())])

Unnamed: 0,db,sp,name,tissue,url,count,age,note,source,file_path
0,HCL,human,Adult-Adrenal-Gland2,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,8114,52-year-old,HCL,DCD,/public/workspace/licanchengup/link/res_publis...
1,HCL,human,Adult-Adrenal-Gland3,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Adu...,15083,23-year-old,HCL,DCD,/public/workspace/licanchengup/link/res_publis...
2,HCL,human,Fetal-Adrenal-Gland2,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Fet...,9875,12-week-old,HCL,Abortion,/public/workspace/licanchengup/link/res_publis...
3,HCL,human,Fetal-Adrenal-Gland3,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Fet...,2547,14-week-old,HCL,Abortion,/public/workspace/licanchengup/link/res_publis...
4,HCL,human,Fetal-Adrenal-Gland4,Adrenal-Gland,http://bis.zju.edu.cn/HCL/data/DGE/USE_TAR/Fet...,1994,12-week-old,HCL,Abortion,/public/workspace/licanchengup/link/res_publis...


(144, 10)

[info.name] is_unique True
[all data is exists]------------------------------


In [9]:
info['file_path'][0].exists()

True

# merge by db_tissue and exchange the cell_type

> 废弃`3-1_202311160955.csv`

In [None]:
# df_CL = pd.read_csv('3-1_202311160955.csv')
# df_CL['name'] = df_CL['name'].map(lambda _: {
#     'Bone-Marrow_c-kit': 'Bone-Marrow-c-kit'
# }.setdefault(_, _))
# df_CL['_merge_on'] = df_CL.apply(
#     lambda _row: '{db};{tissue};{name};{ann}'.format(
#         **_row), axis=1)

# display(df_CL.head(2))
# print('[df_CL._merge_on] is_unique {}'.format(
#     df_CL['_merge_on'].is_unique))

In [None]:
# info = info.query("tissue == 'Spleen' ")
info['path'] = info.apply(
    lambda _row: p_cache.joinpath(
        'healthy', '{db}_{tissue}'.format(
            **_row)), axis=1)

for _i, _row in info.iterrows():
    print('\r[merge][{}/{}] {db}_{tissue}'.format(_i + 1,
          info.shape[0], **_row).ljust(50, ' '), end='')
    if _row['path'].joinpath('matrix.mtx').exists():
        continue

    adata = None
    adata = {
        _row['name']: load_data_from_HCL_or_MCA(
            _row['file_path'], _row['db'], msg=False) for _i, _row in info.query(
            "db == '{db}' & tissue  =='{tissue}'".format(
                **_row)).iterrows()}
    for _k, _v in adata.items():
        _v.obs['_batch'] = _k
    adata = sc.concat(adata, index_unique=';')
    assert adata.obs.index.is_unique, '[error] {db}_{tissue} index not unique'.format(
        **_row)

    adata.obs['cell_name'] = adata.obs.index.to_numpy()

    # adata.obs['_merge_on'] = adata.obs.apply(lambda r: '{};{};{};{}'.format(
    #     _row['db'], _row['tissue'], r['_batch'], r['pre_celltype']), axis=1)
    # adata.obs = adata.obs.loc[:, 'cell_name,_batch,pre_celltype,_merge_on'.split(',')].merge(
    #     df_CL.query("db == '{}' & tissue == '{}'".format(_row['db'], _row['tissue'])).loc[:, '_merge_on,CL'.split(',')],
    #     on='_merge_on',
    #     how='left'
    # )
    # adata.obs.index = adata.obs['cell_name'].to_numpy()
    # adata.obs = adata.obs.loc[:, '_batch,pre_celltype,CL'.split(',')]
    # assert adata.obs['CL'].notna().all(
    # ), '[error] {db}_{tissue} some pre_celltype not map CL'.format(**_row)
    
    adata.obs.index = adata.obs['cell_name'].to_numpy()
    adata.obs = adata.obs.loc[:, '_batch,pre_celltype'.split(',')]
    print('')
    h5ad_to_mtx(adata, _row['path'])
print('\n', '\n[finish] merge\n'.center(100, '-'))

In [None]:
info = info.loc[:, 'tissue,sp,path'.split(',')].drop_duplicates()
display(
    pd.Series(
        info['tissue'].unique()).str.lower().str.slice(
            0,
        3).is_unique)
info['name'] = info['tissue'].str.lower().str.slice(0, 3)
info['sp_simple'] = info['sp'].map(map_sp_reverse)
info['name'] = info.apply(
    lambda _row: '{sp_simple}_{name}'.format(
        **_row), axis=1)
info['path'] = info['path'].apply(lambda x:x.relative_to(p_cache))
info.to_csv(p_cache.joinpath('info_healthy_HCL_MCA.csv'),index=False)
info.head(2)

In [None]:
df_para = pd.concat([pd.merge(
    left=info.query("sp == 'human'"),
    right=info.query("sp == 'mouse'"),
    on='tissue',
    suffixes=('_ref', '_que')),
    pd.merge(
    left=info.query("sp == 'mouse'"),
    right=info.query("sp == 'human'"),
    on='tissue',
    suffixes=('_ref', '_que'))], axis=0)
df_para['key_cell_type'] = 'CL'
df_para.to_csv(p_cache.joinpath(
    'parameter_healthy_HCL_MCA.csv'
), index=False)
display(df_para.head())

In [None]:
print("\n[finish]\n".center(100, "-"))