In [1]:
import scanpy as sc
from anndata import read_h5ad
import pandas as pd
import numpy as np
import scipy as sp
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns
import os
from os.path import join
import time
from gprofiler import GProfiler

# scTRS tools
import scTRS.util as util
import scTRS.data_loader as dl
import scTRS.method as md

# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
# Setup file paths
DATA_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data'
GS_FILE='/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/gs_file/magma_10kb_1000.gs'
FIG_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/results/fig_hep'

df_gs = pd.read_csv(GS_FILE, sep='\t')
TRAIT_LIST_FULL = sorted(df_gs['TRAIT'])

# Data set info
DS_LIST = ['facs']
DIC_INFO = {}
DIC_INFO['facs'] = {'species': 'mmusculus', 'dname': 'TMS FACS'}

# Set score files
DIC_SCORE_PATH = {'facs': DATA_PATH+'/score_file/score.tms_facs_with_cov.magma_10kb_1000'}

In [3]:
# Load raw data 
dic_data_raw = {}
dic_data_raw['facs'] = dl.load_tms_ct(DATA_PATH, data_name='facs')

dic_data_proc = {}
dic_data_proc['facs'] = read_h5ad('/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/single_cell_data/tms_proc/'
                                  'hep.facs.h5ad')

# Load score 
dic_score = {x:pd.DataFrame() for x in DIC_SCORE_PATH}
for score in DIC_SCORE_PATH:
    print('# Loading %s score'%score)
    for trait in TRAIT_LIST_FULL:
        score_file = DIC_SCORE_PATH[score]+'/%s.score.gz'%trait
        if os.path.exists(score_file) is False:
            print('# Missing score file: %s'%trait)
        temp_df = pd.read_csv(score_file, sep='\t', index_col=0)
        temp_df.columns = ['%s.%s'%(trait,x) for x in temp_df.columns]
        temp_df['%s.fdr'%trait] = multipletests(temp_df['%s.pval'%trait], method='fdr_bh')[1]
        dic_score[score] = pd.concat([dic_score[score], temp_df], axis=1)

# Align cell list 
dic_cell_list = {}
for ds in DS_LIST:
    dic_cell_list[ds] = list(set(dic_data_proc['facs'].obs_names) & set(dic_score[ds].index))
display(dic_data_raw[ds][dic_cell_list[ds],:].obs.groupby('cell_ontology_class').agg({'cell':len}))

Trying to set attribute `.obs` of view, copying.


# Loading facs score


Unnamed: 0_level_0,cell
cell_ontology_class,Unnamed: 1_level_1
hepatocyte,1162


In [4]:
# Overall association
TRAIT_LIST = []
for ds in DS_LIST:
    print('# %s'%DIC_INFO[ds]['dname'])
    for trait in TRAIT_LIST_FULL:
        if '%s.fdr'%trait not in dic_score[ds].columns:
            print('# Missing %s'%trait)
            continue
        
        ind_select = (dic_score[ds].loc[dic_cell_list[ds], '%s.fdr'%trait]<0.1)
        if ind_select.mean()>0.01:
            print('# %s %0.3f'%(trait, ind_select.mean()))
            TRAIT_LIST.append(trait)

# TMS FACS
# PASS_LDL 0.426
# UKB_460K.biochemistry_AlanineAminotransferase 0.388
# UKB_460K.biochemistry_AlkalinePhosphatase 0.233
# UKB_460K.biochemistry_ApolipoproteinA 0.373
# UKB_460K.biochemistry_ApolipoproteinB 0.550
# UKB_460K.biochemistry_Cholesterol 0.515
# UKB_460K.biochemistry_HDLcholesterol 0.250
# UKB_460K.biochemistry_LDLdirect 0.504
# UKB_460K.biochemistry_LipoproteinA 0.185
# UKB_460K.biochemistry_SHBG 0.123
# UKB_460K.biochemistry_Testosterone_Male 0.230
# UKB_460K.biochemistry_TotalBilirubin 0.094
# UKB_460K.biochemistry_Triglycerides 0.506


In [5]:
TRAIT_LIST

['PASS_LDL',
 'UKB_460K.biochemistry_AlanineAminotransferase',
 'UKB_460K.biochemistry_AlkalinePhosphatase',
 'UKB_460K.biochemistry_ApolipoproteinA',
 'UKB_460K.biochemistry_ApolipoproteinB',
 'UKB_460K.biochemistry_Cholesterol',
 'UKB_460K.biochemistry_HDLcholesterol',
 'UKB_460K.biochemistry_LDLdirect',
 'UKB_460K.biochemistry_LipoproteinA',
 'UKB_460K.biochemistry_SHBG',
 'UKB_460K.biochemistry_Testosterone_Male',
 'UKB_460K.biochemistry_TotalBilirubin',
 'UKB_460K.biochemistry_Triglycerides']