In [4]:
# autoreload
%load_ext autoreload
%autoreload 2

import scTRS.method_simple as simple_md
import scTRS.util as util
import scTRS.method as md
import scTRS.data_loader as dl
import submitit
import numpy as np
from anndata import read_h5ad
from statsmodels.stats.multitest import multipletests

import pandas as pd
import os
from os.path import join
import itertools
import pickle
from IPython.display import display, Markdown, Latex
import matplotlib.pyplot as plt

# constants
DATA_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data'
SCORE_FILE_DIR = join(DATA_PATH, "score_file")

TRAIT_LIST = ['PASS_Schizophrenia_Ruderfer2018',
              'PASS_BipolarDisorder_Ruderfer2018',
              'PASS_Alzheimers_Jansen2019', 
              'PASS_AdultOnsetAsthma_Ferreira2019',
              'PASS_Coronary_Artery_Disease', 
              'PASS_LargeArteryStroke_Malik2018', 
              'PASS_HDL', 'PASS_LDL',
              'PASS_Rheumatoid_Arthritis', 'PASS_Lupus', 
              'PASS_FastingGlucose_Manning',
              'PASS_IBD_deLange2017', 
              'PASS_Type_1_Diabetes', 
              'PASS_Type_2_Diabetes']

SCORE_ADATA = read_h5ad("/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data/tabula_muris_senis/tabula-muris-senis-facs-official-raw-obj.h5ad")
md.compute_stats(SCORE_ADATA)
PLOT_ADATA_DICT = dl.load_tms_processed('/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data', data_name='facs', tissue='all')

# load_tms_processed: load facs data, tissue=[Aorta, BAT, Bladder, Brain_Myeloid, Brain_Non-Myeloid, Diaphragm, GAT, Heart, Kidney, Large_Intestine, Limb_Muscle, Liver, Lung, MAT, Mammary_Gland, Marrow, Pancreas, SCAT, Skin, Spleen, Thymus, Tongue, Trachea]


In [11]:
result_dir = join(SCORE_FILE_DIR, "score.facs.gwas_max_abs_z.top500")
result_dict = {}
for trait in TRAIT_LIST:
    result_dict[trait] = pd.read_csv(join(result_dir, f"{trait}.score.gz"), sep='\t')
    
# add random traits result
result_dir = join(SCORE_FILE_DIR, "score.facs.all_size500.weight_1en2")
for rep_i in range(5):
    result_dict[f"random_{rep_i}"] = pd.read_csv(join(result_dir, f"all_size500_rep{rep_i}.score.gz"), sep='\t')

In [100]:
zsc_dict = {trait: result_dict[trait].zscore.values for trait in result_dict}
zsc_index = result_dict[trait]['index'].values
# For each trait, calculate the mean, sd, geary's C statistics for each tissue - celltype pair
# and priotorize
trs_stats = util.calculate_trs_stats(zsc_dict=zsc_dict, zsc_index=zsc_index, 
                                stats_dict = {"mean": np.mean, "sd": np.std, "gearysc": None},
                                adata=PLOT_ADATA_DICT["Liver"], stratify_by="cell_ontology_class")


In [101]:
trs_stats["gearysc"]

Unnamed: 0,PASS_Schizophrenia_Ruderfer2018,PASS_BipolarDisorder_Ruderfer2018,PASS_Alzheimers_Jansen2019,PASS_AdultOnsetAsthma_Ferreira2019,PASS_Coronary_Artery_Disease,PASS_LargeArteryStroke_Malik2018,PASS_HDL,PASS_LDL,PASS_Rheumatoid_Arthritis,PASS_Lupus,PASS_FastingGlucose_Manning,PASS_IBD_deLange2017,PASS_Type_1_Diabetes,PASS_Type_2_Diabetes,random_0,random_1,random_2,random_3,random_4
B cell,1.039177,1.011243,0.979463,0.974678,1.001037,1.01177,0.875911,0.928213,0.905037,0.905279,0.985546,0.83645,0.938148,0.973006,0.973288,1.006356,1.00588,0.949516,1.00718
"CD4-positive, alpha-beta T cell",0.871667,0.972955,0.709344,0.876888,1.107593,0.781877,1.027696,0.86817,0.877461,0.986549,1.103031,1.092697,1.272804,1.02177,1.220331,0.859227,1.306894,1.164752,0.764184
"CD8-positive, alpha-beta T cell",0.995675,1.052394,1.050777,0.867412,0.821725,0.938052,0.891429,0.917746,0.819606,0.870697,0.921228,0.908596,0.756259,1.001931,1.205003,0.661051,0.948431,0.874247,0.907622
Kupffer cell,0.928977,0.890426,0.875614,0.903162,0.876805,0.916874,0.714575,0.794043,0.846783,0.878217,0.928325,0.81624,0.902863,0.88756,0.9622,0.857843,0.996268,0.977885,1.012036
NK cell,1.101968,0.814428,0.758948,0.988263,0.955194,0.998032,0.803135,0.453476,0.853145,0.886271,0.824517,0.939479,0.662274,1.112459,0.75869,0.953007,0.904927,0.809062,0.972399
T cell,0.879025,0.851979,0.943573,0.948879,1.229783,0.844868,1.11706,1.190418,0.857672,0.952504,0.84403,0.650352,1.028485,0.949988,0.97622,1.19512,0.950427,1.303049,1.168842
endothelial cell of hepatic sinusoid,0.961888,0.945343,0.923408,0.928275,0.969067,0.974004,0.927573,1.008562,0.940645,0.849935,0.906483,0.9414,0.875595,0.884296,0.996263,0.984006,0.979057,0.96437,0.980398
hepatocyte,0.677828,0.620227,0.769177,0.74506,0.513472,0.691732,0.381349,0.315428,0.524034,0.769281,0.586936,0.667161,0.458084,0.67065,0.878811,0.791727,0.722965,0.617389,0.937116
mature NK T cell,1.038774,0.918424,0.898978,0.950493,0.977961,0.888286,0.918966,0.939835,0.710834,0.862725,0.8965,0.793074,0.911399,1.003301,1.047958,0.983432,0.959366,0.871799,0.978224
myeloid leukocyte,0.93626,0.937924,0.870291,0.919832,0.8575,0.769296,0.810039,0.789848,0.98731,0.830174,0.881662,0.905229,0.893031,0.923296,0.917836,0.927021,0.806406,0.948365,1.011219
