# disease_Mtb_0

整理Mtb相关数据集,最后输出`cache/parameter_Mtb.csv`

更新时间 2024年4月1日

[Mtb mouse GSE167232](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE167232)

[PMC8302446](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8302446/)

In [1]:
import sys
from pathlib import Path
p_root = Path('~/link/res_publish').expanduser()
None if str(p_root) in sys.path else sys.path.append(str(p_root))

In [2]:
from func import *

  from .autonotebook import tqdm as notebook_tqdm
2024-05-05 01:07:18.677597: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



-------------------------help-------------------------
> parameter
    p_root	[name] res_publish
        p_run, p_plot, p_res, p_cache, p_pdf
    p_df_varmap
    map_sp_reverse
    rng
> run
    run_cross_species_models
    h5ad_to_mtx
    load_adata
    get_path_varmap
    find_path_from_para
    load_normalized_adata

> res
    get_test_result_df
    get_res_obs
    get_adata_umap
    show_umap

> plot
    get_color_map
    show_color_map
    show_color
    plot_umap
    savefig



# [Mtb_mh_GSE167232](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE167232)

[PMC8302446](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8302446/)

+ `./cache/disease/Mtb_mh_GSE167232`
    + `0_0.ipynb`
    + `1_mtb_0.ipynb`
    + `1_mtb_1.ipynb`
    + `2_bal_0.ipynb`
    + `2_bal_1.ipynb` 

# [Mtb_macaque_SCP642](https://singlecell.broadinstitute.org/single_cell/study/SCP642/cd8-lymphocytes-are-critical-for-early-control-of-tuberculosis-in-macaques#study-download)


[PMID: 37843832](https://pubmed.ncbi.nlm.nih.gov/37843832/)

+ `./cache/disease/Mtb_macaque_SCP642`
    + `0_Mtb_macaque_SCP642.ipynb`

# [COVID_macaque_GSE217483](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE217483)

[PMC9700497](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC9700497/)

+ `./cache/disease/COVID_macaque_GSE217483`
    + `0_COVID_macaque_GSE217483.ipynb`

# info and parameter

In [20]:
info = pd.DataFrame({
    'path': p_cache.joinpath('disease').iterdir()
})
info['name'] = info['path'].apply(lambda x: x.name)
info = info[info['path'].apply(
    lambda x: x.joinpath('matrix.mtx').exists())]
info = info[info['path'].apply(lambda x: x.match('*/Mtb_*')) |
            info['path'].apply(lambda x: x.match('*/COVID_*')) ]
info['tissue'] = 'Mtb'
info = info.join(info['name'].str.extract(
    '_(?P<sp_simple>ma|m|h)_(?P<tag>[^_]+)'))

info['sp'] = info['sp_simple'].map(map_sp)
info['name'] = info['name'].map(
    lambda k: {'Mtb_m_all_GSE167232': 'Mtb-m-all',
               'Mtb_ma_macrophage_delCD8_SCP642': 'Mtb-ma-macr',
               'Mtb_m_macrophage_GSE167232': 'Mtb-m-macr',
               'COVID_ma_macrophage_GSE217483': 'COVID-ma-macr',
               'Mtb_h_macrophage_GSE167232': 'Mtb-h-macr',
              'Mtb_COVID_merge_ma_macrophage':'merge-ma-macr'}.setdefault(k, k))
assert info['name'].notna().all(), '[Error] has na'
assert info['name'].is_unique, '[Error] not unique'
info.index = info['name'].to_numpy()
info = info.loc[:, 'tissue,sp,path,name,sp_simple,tag'.split(',')]
info['path'] = info['path'].apply(lambda x:x.relative_to(p_cache))
info.to_csv(p_cache.joinpath('info_Mtb.csv'),index=False)
info.head(2)
info

Unnamed: 0,tissue,sp,path,name,sp_simple,tag
Mtb-m-all,Mtb,mouse,disease/Mtb_m_all_GSE167232,Mtb-m-all,m,all
Mtb-ma-macr,Mtb,macaque,disease/Mtb_ma_macrophage_delCD8_SCP642,Mtb-ma-macr,ma,macrophage
Mtb-m-macr,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macrophage
merge-ma-macr,Mtb,macaque,disease/Mtb_COVID_merge_ma_macrophage,merge-ma-macr,ma,macrophage
COVID-ma-macr,Mtb,macaque,disease/COVID_ma_macrophage_GSE217483,COVID-ma-macr,ma,macrophage
Mtb-h-macr,Mtb,human,disease/Mtb_h_macrophage_GSE167232,Mtb-h-macr,h,macrophage


In [22]:
i_ref = 'Mtb-m-macr'
df_para = []
for i_que in 'Mtb-h-macr,Mtb-ma-macr,COVID-ma-macr,merge-ma-macr'.split(','):
    df_para.append(pd.merge(
        info.loc[[i_ref], :],
        info.loc[[i_que], :],
        on=['tissue', 'tag'], suffixes=('_ref', '_que')
    ))
df_para = pd.concat(df_para)
df_para

Unnamed: 0,tissue,sp_ref,path_ref,name_ref,sp_simple_ref,tag,sp_que,path_que,name_que,sp_simple_que
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macrophage,human,disease/Mtb_h_macrophage_GSE167232,Mtb-h-macr,h
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macrophage,macaque,disease/Mtb_ma_macrophage_delCD8_SCP642,Mtb-ma-macr,ma
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macrophage,macaque,disease/COVID_ma_macrophage_GSE217483,COVID-ma-macr,ma
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macrophage,macaque,disease/Mtb_COVID_merge_ma_macrophage,merge-ma-macr,ma


In [23]:
# df_para['key_cell_type'] = df_para['name_que'].map(lambda x:
#                         {
#                             'Mtb-h-macr': 'cell_type',
#                             'Mtb-ma-macr': 'sub_cell_type',
#                             'COVID-ma-macr': 'sub_cell_type'
#                         }.setdefault(x, x))
df_para['key_cell_type'] = 'macr_cell_type'
df_para = df_para.drop(columns = ['tag'])
display(df_para)
print(*df_para.columns, sep='\n')

Unnamed: 0,tissue,sp_ref,path_ref,name_ref,sp_simple_ref,sp_que,path_que,name_que,sp_simple_que,key_cell_type
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,human,disease/Mtb_h_macrophage_GSE167232,Mtb-h-macr,h,macr_cell_type
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macaque,disease/Mtb_ma_macrophage_delCD8_SCP642,Mtb-ma-macr,ma,macr_cell_type
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macaque,disease/COVID_ma_macrophage_GSE217483,COVID-ma-macr,ma,macr_cell_type
0,Mtb,mouse,disease/Mtb_m_macrophage_GSE167232,Mtb-m-macr,m,macaque,disease/Mtb_COVID_merge_ma_macrophage,merge-ma-macr,ma,macr_cell_type


tissue
sp_ref
path_ref
name_ref
sp_simple_ref
sp_que
path_que
name_que
sp_simple_que
key_cell_type


In [24]:
df_para.to_csv(p_cache.joinpath('parameter_Mtb.csv'), index=False)
print("\n[finish]\n".center(100, "-"))

---------------------------------------------
[finish]
---------------------------------------------
