# eQTL mapping example for n=94

In [1]:
import pandas as pd
import os

from jax.config import config

from jaxqtl.families.distribution import Poisson
from jaxqtl.io.geno import PlinkReader
from jaxqtl.io.pheno import PheBedReader
from jaxqtl.io.readfile import read_data
from jaxqtl.map import map_cis, map_cis_nominal
from jaxqtl.infer.permutation import BetaPerm, DirectPerm, Permutation

config.update("jax_enable_x64", True)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


### Read data

In [2]:
geno_path = "../example/data/chr22.n94.bed"
covar_path = "../example/data/donor_features.n94.tsv"
pheno_path = "../example/data/CD14_positive_monocyte.n94.bed.gz"

In [3]:
# read raw data under one data class
dat = read_data(
    geno_path,
    pheno_path,
    covar_path,
    geno_reader=PlinkReader(),
    pheno_reader=PheBedReader(),
)
# dat contains: dat.geno, dat.bim, dat.pheno, dat.pheno_meta, dat.covar

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bim.i = np.arange(0, len(bim))  # reset index i after variant filtering


### Cis Mapping (Report lead eQTL for each gene)

In [4]:
# cis-mapping for chr22 alone
dat.filter_geno("22")

In [5]:
print(dat.geno.shape) # total 143083 variants
print(dat.bim.shape)
print(dat.pheno_meta.gene_map.shape) # 200 genes
print(dat.bim.chrom.unique())

(94, 110107)
(110107, 7)
(200, 4)
<StringArray>
['22']
Length: 1, dtype: string


In [6]:
# Default is fitting poisson model and report nominal and adjusted p value using beta distribution method
# for unit testing, run cis-mapping for the first two genes
mapcis_df = map_cis(dat, family=Poisson(), perm=BetaPerm())

In [8]:
mapcis_df

Unnamed: 0,phenotype_id,chrom,num_var,variant_id,tss_distance,beta_shape1,beta_shape2,beta_converged,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta
0,ENSG00000177663,22,2592,22:17691970,126126,0.040742,0.497352,1.0,2,2.0,0.989362,3.877592e-173,-1.297385,0.046251,0.047619,8.954551e-08
1,ENSG00000069998,22,2862,22:17421074,-225103,0.069091,0.614519,1.0,1,1.0,0.994681,1.324219e-81,-2.279645,0.119143,0.047619,2.435014e-06
2,ENSG00000093072,22,3015,22:17727109,24230,0.031543,0.49945,1.0,2,2.0,0.989362,3.2424949999999998e-217,-1.254458,0.039877,0.047619,1.422406e-07
3,ENSG00000131100,22,3589,22:18525678,414094,0.039619,0.449719,1.0,1,1.0,0.994681,8.214258000000001e-262,-1.972621,0.057068,0.047619,4.254163e-11
4,ENSG00000099968,22,3590,22:17641685,-469936,0.06614,0.605555,1.0,26,26.0,0.861702,9.615735e-53,-0.644316,0.042153,0.095238,0.0003421288
5,ENSG00000015475,22,3191,22:18043325,-214211,0.053913,0.570602,1.0,1,1.0,0.994681,5.520372e-81,-1.074243,0.056364,0.047619,4.459175e-05
6,ENSG00000269220,22,3190,22:18062511,-197577,0.067876,0.653734,1.0,5,5.0,0.973404,4.326579e-110,-1.673545,0.075071,0.047619,3.590449e-08
7,ENSG00000070413,22,2647,22:19277323,167356,0.08237,0.730972,1.0,3,3.0,0.984043,3.0983869999999998e-77,-1.596112,0.085804,0.047619,4.779396e-07
8,ENSG00000100075,22,2782,22:19590231,423888,0.100799,0.61322,1.0,3,4.0,0.978723,7.797988e-49,-0.79154,0.053894,0.047619,1.301153e-05
9,ENSG00000185608,22,3437,22:19660536,241111,0.052069,0.586835,1.0,1,1.0,0.994681,1.23395e-203,-2.10017,0.068973,0.047619,2.588962e-11


In [9]:
# calculate q values
import rpy2
from jaxqtl.post import rfunc

In [25]:
qvalue_lambda = 0 # BH adjustment, failed to estimate pi0 for 10 genes
qval, pi0 = rfunc.qvalue(mapcis_df['pval_beta'], lambda_qvalue=qvalue_lambda)

In [24]:
qval

array([2.23863763e-07, 3.47859118e-06, 2.84481118e-07, 2.12708155e-10,
       3.42128803e-04, 4.95463904e-05, 1.19681639e-07, 7.96566040e-07,
       1.62644110e-05, 2.12708155e-10])

### Cis Mapping (Report all cis association statistics)

In [26]:
prefix = "dat_n94"
out_dir = "../example/result"

map_cis_nominal(dat, family=Poisson(), out_dir=out_dir, prefix=prefix)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["af"][start_row:end_row] = af[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_samples"][start_row:end_row] = ma_samples[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_count"][start_row:end_row] = ma_count[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["pval_nomin

In [27]:
# load result
pairs_df = pd.read_parquet(os.path.join(out_dir, f'{prefix}.cis_qtl_pairs.22.parquet'))
pairs_df.head()

Unnamed: 0,chrom,snp,pos,i,phenotype_id,tss_distance,af,ma_samples,ma_count,pval_nominal,slope,slope_se,converged
0,22,22:17066020,17066020,62,ENSG00000177663,-499824,0.984043,3.0,3.0,1.773196e-25,1.286764,0.123349,True
1,22,22:17066700,17066700,63,ENSG00000177663,-499144,0.760638,40.0,45.0,0.01012771,-0.050836,0.019769,True
2,22,22:17067504,17067504,64,ENSG00000177663,-498340,0.31383,51.0,59.0,0.02865608,-0.041832,0.019117,True
3,22,22:17068400,17068400,65,ENSG00000177663,-497444,0.117021,21.0,22.0,3.782973e-08,0.137724,0.025038,True
4,22,22:17069064,17069064,66,ENSG00000177663,-496780,0.111702,21.0,21.0,2.886101e-07,0.139704,0.027229,True
