In [1]:
import scanpy as sc
from anndata import read_h5ad
import pandas as pd
import numpy as np
import scipy as sp
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns
import os
from os.path import join
import time
from gprofiler import GProfiler

# scTRS tools
import scTRS.util as util
import scTRS.data_loader as dl
import scTRS.method as md

# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
# Setup file paths
DATA_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data'
GS_FILE='/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/gs_file/magma_10kb_1000.gs'
FIG_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/results/fig_hep'

df_gs = pd.read_csv(GS_FILE, sep='\t')
TRAIT_LIST_FULL = sorted(df_gs['TRAIT'])

# Data set info
DS_LIST = ['facs']
DIC_INFO = {}
DIC_INFO['facs'] = {'species': 'mmusculus', 'dname': 'TMS FACS'}

# Set score files
DIC_SCORE_PATH = {'facs': DATA_PATH+'/score_file/score.tms_facs_with_cov.magma_10kb_1000'}

In [3]:
# Load raw data 
dic_data_raw = {}
dic_data_raw['facs'] = dl.load_tms_ct(DATA_PATH, data_name='facs')

# Load score 
dic_score = {x:pd.DataFrame() for x in DIC_SCORE_PATH}
for score in DIC_SCORE_PATH:
    print('# Loading %s score'%score)
    for trait in TRAIT_LIST_FULL:
        score_file = DIC_SCORE_PATH[score]+'/%s.score.gz'%trait
        if os.path.exists(score_file) is False:
            print('# Missing score file: %s'%trait)
        temp_df = pd.read_csv(score_file, sep='\t', index_col=0)
        temp_df.columns = ['%s.%s'%(trait,x) for x in temp_df.columns]
        temp_df['%s.fdr'%trait] = multipletests(temp_df['%s.pval'%trait], method='fdr_bh')[1]
        dic_score[score] = pd.concat([dic_score[score], temp_df], axis=1)

# Align cell list 
dic_cell_list = {}
for ds in DS_LIST:
    dic_cell_list[ds] = list(set(dic_data_raw['facs'].obs_names) & set(dic_score[ds].index))


Trying to set attribute `.obs` of view, copying.


# Loading facs score


In [42]:
# Overall association
ds = 'facs'
trait_list = ['UKB_460K.bp_DIASTOLICadjMEDz','UKB_460K.bp_SYSTOLICadjMEDz']
temp_df = dic_score[ds][['%s.norm_score'%x for x in trait_list]].copy()
temp_df.columns = [x.replace('.norm_score','').replace('UKB_460K.','') for x in temp_df.columns]
display(temp_df.corr())

for trait in trait_list:
    if '%s.fdr'%trait not in dic_score[ds].columns:
        print('# Missing %s'%trait)
        continue

    temp_df = dic_data_raw['facs'].obs.copy()
    temp_df = temp_df.join(dic_score[ds][['%s.fdr'%trait]])
    temp_df['sig'] = temp_df['%s.fdr'%trait]<0.1
    print('# %s: %d/%d'%(trait, temp_df['sig'].sum(), temp_df.shape[0]))
    temp_df = temp_df.groupby('cell_ontology_class').agg({'sig':np.mean})
    temp_df = temp_df.loc[temp_df['sig']>0.1]
    if temp_df.shape[0]>0:
        display(temp_df)

Unnamed: 0,bp_DIASTOLICadjMEDz,bp_SYSTOLICadjMEDz
bp_DIASTOLICadjMEDz,1.0,0.689135
bp_SYSTOLICadjMEDz,0.689135,1.0


# UKB_460K.bp_DIASTOLICadjMEDz: 492/110096


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
brain pericyte,0.190083
pericyte cell,0.263158
ventricular myocyte,0.401361


# UKB_460K.bp_SYSTOLICadjMEDz: 462/110096


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
atrial myocyte,0.248139
brain pericyte,0.194215
pericyte cell,0.263158
ventricular myocyte,0.55102


In [5]:
DS_LIST

['facs']