In [1]:
import scanpy as sc
import pandas as pd
import numpy as np
import scipy as sp
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import seaborn as sns
import os
import time
from gprofiler import GProfiler

# scTRS tools
import scTRS.util as util
import scTRS.data_loader as dl
import scTRS.method as md

# autoreload
%load_ext autoreload
%autoreload 2
# logging
sc.logging.print_versions()

scanpy==1.5.1 anndata==0.7.4 umap==0.4.6 numpy==1.19.0 scipy==1.5.1 pandas==1.0.5 scikit-learn==0.23.1 statsmodels==0.11.1


In [2]:
# Setup file paths
DATA_PATH = '/n/holystore01/LABS/price_lab/Users/mjzhang/scTRS_data'
# Please change this to your own path
RESULT_PATH = '/n/home11/mjzhang/gwas_informed_scRNAseq/results'

In [3]:
# Count data (used for scoring genes)
start_time = time.time()
data_facs_ct = dl.load_tms_ct(DATA_PATH, data_name='facs')
print('# TMS facs count data: n_cell=%d, n_gene=%d'
      %(data_facs_ct.shape[0], data_facs_ct.shape[1]))
print('# time=%0.1fs'%(time.time() - start_time))

Trying to set attribute `.obs` of view, copying.


# TMS facs count data: n_cell=110096, n_gene=22966
# time=10.0s


In [4]:
# Precompute mean and var 
md.compute_stats(data_facs_ct)

In [5]:
gene_list = ['Abca1', 'Abcg5', 'Abcg8', 'Abhd12', 'Abi2', 'Abo', 'Acad11', 'Acer2', 'Acmsd', 'Adnp2',
             'Zdhhc24', 'Zhx3', 'Zfp112', 'Zfp180', 'Zfp296', 'Zfp366', 'Zfp664', 'Zfp78', 'Zfp821', 'Zpr1']
ctrlgene_list = ['Angptl3', 'Anxa6', 'Anxa9', 'Ap1m2', 'Apoa5',
                 'Apob', 'Apoc1', 'Apoc4', 'Apoe', 'Arcn1', 'Arhgap1', 'Arhgef15',
                 'Arid1a', 'Arid3a', 'Asgr1', 'Atg13', 'Atg4c', 'Atp13a1']
np.random.seed(0)
gene_weight = np.random.rand(len(gene_list))

md.score_cell(data_facs_ct, gene_list, gene_weight=gene_weight, suffix='',
              ctrl_opt='mean_match', trs_opt='vst', bc_opt='empi',
              n_ctrl=1, n_genebin=200, ctrlgene_list=ctrlgene_list,
              return_list=['trs', 'trs_z', 'trs_tp', 'trs_ep', 'trs_ez'], verbose=False)

print(data_facs_ct.obs['trs'].values[0:5])
print(data_facs_ct.obs['trs_z'].values[0:5])
print(data_facs_ct.obs['trs_tp'].values[0:5])
print(data_facs_ct.obs['trs_ep'].values[0:5])
print(data_facs_ct.obs['trs_ez'].values[0:5])

[0.05186189 0.07447904 0.01608617 0.00266137 0.        ]
[ 0.39956956  0.82447401 -0.28406384 -0.35053454 -1.55664527]
[0.34473679 0.20483514 0.61181927 0.63703121 0.94022264]
[0.22229489 0.14275593 0.45964014 0.49143028 1.        ]
[  0.7644657    1.06801917   0.10134037   0.02148274 -10.        ]


In [10]:
gene_list = ['Abca1', 'Abcg5', 'Abcg8', 'Abhd12', 'Abi2', 'Abo', 'Acad11', 'Acer2', 'Acmsd', 'Adnp2',
             'Zdhhc24', 'Zhx3', 'Zfp112', 'Zfp180', 'Zfp296', 'Zfp366', 'Zfp664', 'Zfp78', 'Zfp821', 'Zpr1']
ctrlgene_list = ['Angptl3', 'Anxa6', 'Anxa9', 'Ap1m2', 'Apoa5',
                 'Apob', 'Apoc1', 'Apoc4', 'Apoe', 'Arcn1', 'Arhgap1', 'Arhgef15',
                 'Arid1a', 'Arid3a', 'Asgr1', 'Atg13', 'Atg4c', 'Atp13a1']
np.random.seed(0)
gene_weight = np.random.rand(len(gene_list))

md.score_cell(data_facs_ct, gene_list, gene_weight=gene_weight, suffix='',
              ctrl_opt='mean_match', trs_opt='vst', bc_opt='empi',
              n_ctrl=1, n_genebin=200, ctrlgene_list=ctrlgene_list,
              return_list=['trs', 'trs_z', 'trs_tp', 'trs_ep', 'trs_ez'], verbose=False)

print(data_facs_ct.obs['trs'].values[0:5])
print(data_facs_ct.obs['trs_z'].values[0:5])
print(data_facs_ct.obs['trs_tp'].values[0:5])
print(data_facs_ct.obs['trs_ep'].values[0:5])
print(data_facs_ct.obs['trs_ez'].values[0:5])

[0.05186189 0.07447904 0.01608617 0.00266137 0.        ]
[ 0.39956956  0.82447401 -0.28406384 -0.35053454 -1.55664527]
[0.34473679 0.20483514 0.61181927 0.63703121 0.94022264]
[0.22229489 0.14275593 0.45964014 0.49143028 1.        ]
[  0.7644657    1.06801917   0.10134037   0.02148274 -10.        ]
