In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import scipy as sp
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
from os.path import join
import glob

# scTRS tools
import scTRS.util as util
import scTRS.data_loader as dl
import scTRS.method as md
from anndata import read_h5ad

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# logging
sc.logging.print_header()

scanpy==1.6.0 anndata==0.7.4 umap==0.4.6 numpy==1.19.4 scipy==1.5.2 pandas==1.1.4 scikit-learn==0.23.2 statsmodels==0.12.0 python-igraph==0.8.3 leidenalg==0.8.2


In [2]:
#### Setup file paths
DATA_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data'

# Count data (used for scoring genes)
score_adata = read_h5ad("/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/tabula_muris_senis/tabula-muris-senis-facs-official-raw-obj.h5ad")
# Plot data (for UMAP)
plot_adata_dict = dl.load_tms_processed(DATA_PATH, data_name='facs', tissue='all')

# load_tms_processed: load facs data, tissue=[Aorta, BAT, Bladder, Brain_Myeloid, Brain_Non-Myeloid, Diaphragm, GAT, Heart, Kidney, Large_Intestine, Limb_Muscle, Liver, Lung, MAT, Mammary_Gland, Marrow, Pancreas, SCAT, Skin, Spleen, Thymus, Tongue, Trachea]


In [3]:
method_trait_dict = dict()
for method in ["gwas_maxabsz", "hess", "magma"]:
    method_trait_dict[method] = [path.split('/')[-1][:-9] for path in glob.glob(f"out/score_file/score.facs.{method}.top500/*")]

In [4]:
target_trait_list = ['PASS_ADHD_Demontis2018', 'PASS_Alzheimers_Jansen2019', 'PASS_Anorexia', 'PASS_Autism', 
                  'PASS_Coronary_Artery_Disease', 'PASS_FastingGlucose_Manning', 'PASS_HDL', 'PASS_Height1', 'PASS_IBD_deLange2017',
                  'PASS_Intelligence_SavageJansen2018', 'PASS_LDL', 'PASS_MDD_Wray2018', 'PASS_Rheumatoid_Arthritis',
                  'PASS_Triglycerides', 'PASS_Type_2_Diabetes', 'PASS_UC_deLange2017', 'UKB_460K.pigment_HAIR',
                  'UKB_460K.pigment_SKIN', 'UKB_460K.blood_MEAN_PLATELET_VOL']
trait_list = sorted(list(set(target_trait_list) & set.intersection(*map(set, method_trait_dict.values()))
                        ))

In [5]:
trs_pval_dict = dict()
for method in ["gwas_maxabsz", "hess", "magma"]:
    trs_pval_dict[method] = dict()
    for trait in trait_list:
        df = pd.read_csv(f"out/score_file/score.facs.{method}.top500/{trait}.score.gz", sep='\t')[['index', 'pval']]
        trs_pval_dict[method][trait] = df

In [6]:
!wget -O ./known_trait_cell_assoc.xlsx https://www.dropbox.com/s/95rd9i1q5b8ytjt/known_trait_cell_assoc_mz102020.xlsx?dl=0 --no-verbose

2021-01-02 00:36:46 URL:https://uca0f4363cf33501c43ba119dc38.dl.dropboxusercontent.com/cd/0/inline2/BGNBoqUEu9Xba9AI9RDt46gh6lbhogHLeUK8ZT6OxK0JRUkVRGvXd8fdWtNVb5Vlers7AWo-oOIMQfOXyh-kmfSTIxgHlmLi5F7QGDCRiYEwRbFiomv9bCLXq0OQrjhR5R2Okm-ED9vVv_fJ4WFjAa8xTrK-ETqD_h9vnWRU07dIeY75TyNg2i_aaW19RpPONwLA9nGGidIXfeE8SxinTDj6aEGmbP64BfI0MDS6nXGcQ7FWYkR1wkYtQUL6kcK0va27gr5AE0BRvbtKsrSwlhVI0QLLmRnMgX0PTj_tGXLy-Szecs6GLrQ-V86jz4EAY4Q6gNSsp2iZusgM0PnMluujdBUjm_duDz60krsiJ1DLRg/file [23823/23823] -> "./known_trait_cell_assoc.xlsx" [1]


In [7]:
known_association = pd.read_excel('./known_trait_cell_assoc.xlsx')
known_association = known_association[ ~pd.isna(known_association['Assoc_Tissue_Celltype'])].reset_index(drop=True)
known_association['tissue'] = known_association['Assoc_Tissue_Celltype'].apply(lambda t: t.split(';')[0]).apply(lambda t : t.split('.')[0])
known_association = known_association[['Trait_Identifier', 'tissue']].rename(columns={'Trait_Identifier': 'trait'})
known_association = known_association[known_association['trait'].isin(trait_list)].reset_index(drop=True)


for tissue in ['Spleen', 'Thymus']:
    for trait in ['PASS_Rheumatoid_Arthritis']:
        known_association = known_association.append({'trait': trait, 'tissue': tissue}, ignore_index=True)

known_association = known_association.sort_values('trait').reset_index(drop=True)

In [8]:
df_obs = score_adata.obs.copy()
enrichment_df = []
# for each known association
for _, assoc in known_association.iterrows():
    for method in trs_pval_dict:
        for trait in trs_pval_dict[method]:
            if trait == assoc.trait:
                trs_pval = trs_pval_dict[method][trait]
                assert (df_obs.index == trs_pval['index']).all()
                df_obs['pval'] = trs_pval['pval'].values
                enrichment = np.mean(np.log10(df_obs['pval'][df_obs['tissue'] == assoc.tissue])) / np.mean(np.log10(df_obs['pval']))
                enrichment_df.append([trait, assoc.tissue, method, enrichment])
enrichment_df = pd.DataFrame(enrichment_df, columns=['trait', 'tissue', 'method', 'enrichment'])

In [9]:
enrichment_df

Unnamed: 0,trait,tissue,method,enrichment
0,PASS_ADHD_Demontis2018,Brain_Non-Myeloid,gwas_maxabsz,1.975806
1,PASS_ADHD_Demontis2018,Brain_Non-Myeloid,hess,2.69238
2,PASS_ADHD_Demontis2018,Brain_Non-Myeloid,magma,1.695213
3,PASS_Alzheimers_Jansen2019,Liver,gwas_maxabsz,2.244134
4,PASS_Alzheimers_Jansen2019,Liver,hess,1.025596
5,PASS_Alzheimers_Jansen2019,Liver,magma,1.914769
6,PASS_Anorexia,Marrow,gwas_maxabsz,0.388224
7,PASS_Anorexia,Marrow,hess,0.556639
8,PASS_Anorexia,Marrow,magma,0.610592
9,PASS_Autism,Brain_Non-Myeloid,gwas_maxabsz,2.22804
