# eQTL mapping example for n=94

In [1]:
import pandas as pd
import os

from jax.config import config

from jaxqtl.families.distribution import Poisson
from jaxqtl.io.geno import PlinkReader
from jaxqtl.io.pheno import PheBedReader
from jaxqtl.io.readfile import read_data
from jaxqtl.map import map_cis, map_cis_nominal
from jaxqtl.infer.permutation import BetaPerm, DirectPerm, Permutation

config.update("jax_enable_x64", True)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


### Read data

In [2]:
geno_path = "../example/data/chr22.bed"
covar_path = "../example/data/donor_features.tsv"
pheno_path = "../example/data/CD14_positive_monocyte.bed.gz"

In [3]:
# read raw data under one data class
dat = read_data(
    geno_path,
    pheno_path,
    covar_path,
    geno_reader=PlinkReader(),
    pheno_reader=PheBedReader(),
)
# dat contains: dat.geno, dat.bim, dat.pheno, dat.pheno_meta, dat.covar

### Cis Mapping (Report lead eQTL for each gene)

In [4]:
# cis-mapping for chr22 alone
dat.filter_geno("22")

In [5]:
print(dat.geno.shape) # total 143083 variants
print(dat.bim.shape)
print(dat.pheno_meta.gene_map.shape) # 200 genes
print(dat.bim.chrom.unique())

(94, 143083)
(143083, 7)
(200, 4)
<StringArray>
['22']
Length: 1, dtype: string


In [11]:
# Default is fitting poisson model and report nominal and adjusted p value using beta distribution method
# for unit testing, run cis-mapping for the first two genes
mapcis_df = map_cis(dat, family=Poisson(), perm=BetaPerm())

In [12]:
mapcis_df.head()

Unnamed: 0,phenotype_id,chrom,num_var,variant_id,tss_distance,beta_shape1,beta_shape2,beta_converged,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta
0,ENSG00000177663,22,2592,22:17691970,126126,,,0.0,2,2.0,0.9893617021276596,3.8775917047978116e-173,-1.297385142375589,0.0462508455451612,0.0476190476190476,
1,ENSG00000069998,22,2862,22:17421074,-225103,0.0690906025545831,0.6145193813457284,1.0,1,1.0,0.9946808510638298,1.3242188329672266e-81,-2.279645377570118,0.1191431479826054,0.0476190476190476,2.4350139383452294e-06


### Cis Mapping (Report all cis association statistics)

In [9]:
prefix = "dat_n94"
out_dir = "../example/result"

map_cis_nominal(dat, family=Poisson(), out_dir=out_dir, prefix=prefix)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf['af'][start_row:end_row] = af[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf['ma_samples'][start_row:end_row] = ma_samples[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf['ma_count'][start_row:end_row] = ma_count[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["pval_nomin

In [10]:
# load result
pairs_df = pd.read_parquet(os.path.join(out_dir, f'{prefix}.cis_qtl_pairs.22.parquet'))
pairs_df.head()

Unnamed: 0,chrom,snp,pos,i,phenotype_id,tss_distance,af,ma_samples,ma_count,pval_nominal,slope,slope_se,converged
0,22,22:17066020,17066020,62,ENSG00000177663,-499824,0.984043,3.0,3.0,6.770875e-30,1.861333,0.163879,True
1,22,22:17066700,17066700,63,ENSG00000177663,-499144,0.760638,40.0,45.0,0.01012771,-0.050836,0.019769,True
2,22,22:17067504,17067504,64,ENSG00000177663,-498340,0.31383,51.0,59.0,0.02865608,-0.041832,0.019117,True
3,22,22:17068400,17068400,65,ENSG00000177663,-497444,0.117021,21.0,22.0,3.782973e-08,0.137724,0.025038,True
4,22,22:17069064,17069064,66,ENSG00000177663,-496780,0.111702,21.0,21.0,2.886101e-07,0.139704,0.027229,True
