In [1]:
import scanpy as sc
from anndata import read_h5ad
import pandas as pd
import numpy as np
import scipy as sp
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns
import os
from os.path import join
import time
from gprofiler import GProfiler

# scTRS tools
import scTRS.util as util
import scTRS.data_loader as dl
import scTRS.method as md

# autoreload
%load_ext autoreload
%autoreload 2

In [2]:
# Setup file paths
DATA_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data'
GS_FILE='/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/gs_file/magma_10kb_1000.gs'
FIG_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/results/fig_hep'

df_gs = pd.read_csv(GS_FILE, sep='\t')
TRAIT_LIST_FULL = sorted(df_gs['TRAIT'])

# Data set info
DS_LIST = ['facs']
DIC_INFO = {}
DIC_INFO['facs'] = {'species': 'mmusculus', 'dname': 'TMS FACS'}

# Set score files
DIC_SCORE_PATH = {'facs': DATA_PATH+'/score_file/score.tms_facs_with_cov.magma_10kb_1000'}

In [3]:
# Load raw data 
dic_data_raw = {}
dic_data_raw['facs'] = dl.load_tms_ct(DATA_PATH, data_name='facs')

dic_data_proc = {}
dic_data_proc['facs'] = dl.load_tms_processed(DATA_PATH, data_name='facs', tissue='Pancreas')['Pancreas']

# Load score 
dic_score = {x:pd.DataFrame() for x in DIC_SCORE_PATH}
for score in DIC_SCORE_PATH:
    print('# Loading %s score'%score)
    for trait in TRAIT_LIST_FULL:
        score_file = DIC_SCORE_PATH[score]+'/%s.score.gz'%trait
        if os.path.exists(score_file) is False:
            print('# Missing score file: %s'%trait)
        temp_df = pd.read_csv(score_file, sep='\t', index_col=0)
        temp_df.columns = ['%s.%s'%(trait,x) for x in temp_df.columns]
        temp_df['%s.fdr'%trait] = multipletests(temp_df['%s.pval'%trait], method='fdr_bh')[1]
        dic_score[score] = pd.concat([dic_score[score], temp_df], axis=1)

# Align cell list 
dic_cell_list = {}
for ds in DS_LIST:
    dic_cell_list[ds] = list(set(dic_data_proc['facs'].obs_names) & set(dic_score[ds].index))
display(dic_data_raw[ds][dic_cell_list[ds],:].obs.groupby('cell_ontology_class').agg({'cell':len}))

Trying to set attribute `.obs` of view, copying.


# load_tms_processed: load facs data, tissue=[Pancreas]



This is where adjacency matrices should go now.

This is where adjacency matrices should go now.


# Loading facs score


Unnamed: 0_level_0,cell
cell_ontology_class,Unnamed: 1_level_1
endothelial cell,202
leukocyte,120
pancreatic A cell,521
pancreatic B cell,1342
pancreatic D cell,191
pancreatic PP cell,71
pancreatic acinar cell,576
pancreatic ductal cell,313
pancreatic stellate cell,48


In [16]:
# Overall association
TRAIT_LIST = []
for ds in DS_LIST:
    print('# %s'%DIC_INFO[ds]['dname'])
    for trait in TRAIT_LIST_FULL:
        if '%s.fdr'%trait not in dic_score[ds].columns:
            print('# Missing %s'%trait)
            continue
            
        ind_select = (dic_score[ds].loc[dic_cell_list[ds], '%s.fdr'%trait]<0.1)
        if ind_select.mean()>0.01:
            print('# %s %0.3f'%(trait, ind_select.mean()))
            temp_df = dic_data_proc['facs'].obs.copy()
            temp_df = temp_df.join(dic_score[ds][['%s.fdr'%trait]])
            temp_df['sig'] = temp_df['%s.fdr'%trait]<0.1
            temp_df = temp_df.groupby('cell_ontology_class').agg({'sig':np.mean})
            temp_df = temp_df.loc[temp_df['sig']>0.1]
            if temp_df.shape[0]>0:
                display(temp_df)
            TRAIT_LIST.append(trait)

# TMS FACS
# PASS_FastingGlucose_Manning 0.140


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.352459


# PASS_Intelligence_SavageJansen2018 0.151


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic A cell,0.132438
pancreatic B cell,0.315946


# PASS_Schizophrenia 0.017
# PASS_Schizophrenia_Pardinas2018 0.038
# PASS_Schizophrenia_Ruderfer2018 0.024
# PASS_Type_2_Diabetes 0.162


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.405365


# PASS_VerbalNumericReasoning_Davies2018 0.053


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.125931


# UKB_460K.biochemistry_Glucose 0.355


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.880775


# UKB_460K.biochemistry_HbA1c 0.264


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.660209


# UKB_460K.bmd_HEEL_TSCOREz 0.020


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic PP cell,0.338028


# UKB_460K.body_BMIz 0.177


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.438152


# UKB_460K.body_HEIGHTz 0.016
# UKB_460K.body_WHRadjBMIz 0.016


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic stellate cell,0.375


# UKB_460K.cancer_BREAST 0.073


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic acinar cell,0.409722


# UKB_460K.cov_EDU_COLLEGE 0.015
# UKB_460K.cov_EDU_YEARS 0.018
# UKB_460K.disease_RESPIRATORY_ENT 0.011


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic PP cell,0.183099


# UKB_460K.impedance_BASAL_METABOLIC_RATEz 0.193


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
pancreatic B cell,0.465723


# UKB_460K.lung_FEV1FVCzSMOKE 0.028


Unnamed: 0_level_0,sig
cell_ontology_class,Unnamed: 1_level_1
endothelial cell,0.128713
pancreatic stellate cell,0.708333


# UKB_460K.mental_NEUROTICISM 0.031
# UKB_460K.other_MORNINGPERSON 0.028
# UKB_460K.repro_AgeFirstBirth_Female 0.012


In [13]:
temp_df

Unnamed: 0_level_0,FACS.selection,age,cell,cell_ontology_class,cell_ontology_id,free_annotation,method,mouse.id,sex,subtissue,tissue,n_genes,n_counts,louvain,leiden,PASS_FastingGlucose_Manning.fdr
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
A10_B000756_B007446_S10.mm10-plus-0-0,Viable,18m,A10_B000756,pancreatic B cell,,pancreatic B cell,facs,18_45_M,male,Endocrine,Pancreas,3212,978772.0,1,15,0.054213
A11_B000756_B007446_S11.mm10-plus-0-0,Viable,18m,A11_B000756,pancreatic B cell,,pancreatic B cell,facs,18_45_M,male,Endocrine,Pancreas,2544,666994.0,1,10,0.039029
A22_B000756_B007446_S22.mm10-plus-0-0,Viable,18m,A22_B000756,pancreatic B cell,,,facs,18_45_M,male,Endocrine,Pancreas,4843,1504617.0,2,2,0.052596
A8_B000756_B007446_S8.mm10-plus-0-0,Viable,18m,A8_B000756,pancreatic B cell,,,facs,18_45_M,male,Endocrine,Pancreas,4462,1286533.0,2,8,0.039286
B11_B000756_B007446_S35.mm10-plus-0-0,Viable,18m,B11_B000756,pancreatic B cell,,pancreatic B cell,facs,18_45_M,male,Endocrine,Pancreas,2673,1102249.0,1,10,0.072794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P17.MAA001857.3_38_F.1.1-1-1,,3m,P17.MAA001857.3_38_F.1.1,pancreatic B cell,CL:0000169,pancreatic B cell,facs,3_38_F,female,Endocrine,Pancreas,3462,245599.0,1,15,0.081560
P19.MAA001862.3_39_F.1.1-1-1,,3m,P19.MAA001862.3_39_F.1.1,pancreatic B cell,CL:0000169,,facs,3_39_F,female,Endocrine,Pancreas,6016,1013464.0,2,8,0.082193
P2.MAA001862.3_39_F.1.1-1-1,,3m,P2.MAA001862.3_39_F.1.1,pancreatic B cell,CL:0000169,,facs,3_39_F,female,Endocrine,Pancreas,6383,1301072.0,4,1,0.061440
P21.MAA001862.3_39_F.1.1-1-1,,3m,P21.MAA001862.3_39_F.1.1,pancreatic B cell,CL:0000169,,facs,3_39_F,female,Endocrine,Pancreas,5896,1395256.0,2,1,0.083598


In [8]:
dic_data_proc['facs']

AnnData object with n_obs × n_vars = 3384 × 22966
    obs: 'FACS.selection', 'age', 'cell', 'cell_ontology_class', 'cell_ontology_id', 'free_annotation', 'method', 'mouse.id', 'sex', 'subtissue', 'tissue', 'n_genes', 'n_counts', 'louvain', 'leiden'
    var: 'n_cells', 'means', 'dispersions', 'dispersions_norm', 'highly_variable'
    uns: 'age_colors', 'cell_ontology_class_colors', 'leiden', 'louvain', 'neighbors', 'pca'
    obsm: 'X_pca', 'X_tsne', 'X_umap'
    varm: 'PCs'
    obsp: 'distances', 'connectivities'