# eQTL mapping example for n=94

In [3]:
import pandas as pd
import os

from jax.config import config

from jaxqtl.families.distribution import Poisson, Gaussian
from jaxqtl.io.geno import PlinkReader
from jaxqtl.io.pheno import PheBedReader
from jaxqtl.io.readfile import create_readydata
from jaxqtl.io.covar import covar_reader
from jaxqtl.map import map_cis, map_cis_nominal
from jaxqtl.infer.permutation import DirectPerm, Permutation
from jaxqtl.log import get_log

config.update("jax_enable_x64", True)

### Read data

In [5]:
geno_path = "../example/data/chr22.n94.bed"
covar_path = "../example/data/donor_features.n94.tsv"
pheno_path = "../example/data/CD14_positive_monocyte.bed.gz"

In [6]:
# raw genotype data and impute for genotype data
geno_reader = PlinkReader()
geno, bim, sample_info = geno_reader(geno_path)

covar = covar_reader(covar_path)

pheno_reader = PheBedReader()
pheno = pheno_reader(pheno_path)

# run Mapping #
dat = create_readydata(
    geno,
    bim,
    pheno,
    covar,
    autosomal_only = True
)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pheno.drop(["chr", "start", "end"], axis=1, inplace=True)


In [7]:
# add first 10 PC of expression profile for this cell type
dat.add_covar_pheno_PC(k=2)

### Cis Mapping (Report lead eQTL for each gene)

In [6]:
# cis-mapping for chr22 alone
dat.filter_geno(0., "22")

In [8]:
print(dat.geno.shape) # total 143083 variants
print(dat.bim.shape)
print(dat.pheno_meta.gene_map.shape) # 200 genes
print(dat.bim.chrom.unique())
print(dat.covar.shape)

(94, 143083)
(143083, 7)
(6990, 4)
<StringArray>
['22']
Length: 1, dtype: string
(94, 14)


In [None]:
# Default is fitting poisson model and report nominal and adjusted p value using beta distribution method
# for unit testing, run cis-mapping for the first two genes
%time mapcis_df = map_cis(dat, family=Poisson(), test_break_n=2, direct_perm=False)

In [8]:
# shifted transform count, y / s + 1
mapcis_df

Unnamed: 0,phenotype_id,chrom,num_var,variant_id,tss_distance,beta_shape1,beta_shape2,beta_converged,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta,qval
0,ENSG00000177663,22,2592,22:17691970,126126,1.050303,31.130028,1.0,2,2.0,0.989362,0.072695,-1.039481,0.579182,0.904762,0.896194,0.896194
1,ENSG00000069998,22,2862,22:17826030,179853,9.915831,57.407599,1.0,3,3.0,0.984043,0.118103,-1.225258,0.784021,0.380952,0.263327,0.789982
2,ENSG00000093072,22,3015,22:18123765,420886,2.333648,78.253388,1.0,1,1.0,0.994681,0.03345,-1.368323,0.643419,0.619048,0.668867,0.896194


In [9]:
# shifted transform count, y / s + 1
mapcis_df

Unnamed: 0,phenotype_id,chrom,num_var,variant_id,tss_distance,beta_shape1,beta_shape2,beta_converged,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta,qval
0,ENSG00000177663,22,2592,22:17691970,126126,1.050303,31.130028,1.0,2,2.0,0.989362,0.072695,-1.039481,0.579182,0.904762,0.896194,0.896194
1,ENSG00000069998,22,2862,22:17826030,179853,9.915831,57.407599,1.0,3,3.0,0.984043,0.118103,-1.225258,0.784021,0.380952,0.263327,0.789982
2,ENSG00000093072,22,3015,22:18123765,420886,2.333648,78.253388,1.0,1,1.0,0.994681,0.03345,-1.368323,0.643419,0.619048,0.668867,0.896194


In [None]:
mapcis_df_Gaussian = map_cis(dat, family=Gaussian())

In [8]:
from jaxqtl.post.qvalue import add_qvalues
import numpy as np
from jaxqtl.log import get_log
log = get_log()

In [9]:
pi0 = None
qvalue_lambda = None
add_qvalues(mapcis_df, log, 0.05, pi0, qvalue_lambda)

Unnamed: 0,phenotype_id,chrom,num_var,variant_id,tss_distance,beta_shape1,beta_shape2,beta_converged,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta,qval,pval_nominal_threshold
0,ENSG00000177663,22,2592,22:17691970,126126,2.681706,1.063671,1.0,2,2.0,0.989362,0.072695,-1.039481,0.579182,0.047619,0.000983,0.001404,0.110781
1,ENSG00000069998,22,2862,22:17826030,179853,3.566182,1.179464,1.0,3,3.0,0.984043,0.118103,-1.225258,0.784021,0.047619,0.000675,0.001126,0.180498
2,ENSG00000093072,22,3015,22:18123765,420886,2.31663,1.154663,1.0,1,1.0,0.994681,0.03345,-1.368323,0.643419,0.047619,0.000481,0.000963,0.074206
3,ENSG00000131100,22,3589,22:18525678,414094,2.37012,0.969281,1.0,1,1.0,0.994681,0.007647,-1.806344,0.677224,0.047619,9e-06,6.2e-05,0.088437
4,ENSG00000099968,22,3590,22:17641685,-469936,3.313077,1.147476,1.0,26,26.0,0.861702,0.15206,-0.657656,0.459163,0.095238,0.002505,0.002783,0.161186
5,ENSG00000015475,22,3191,22:18043325,-214211,3.347359,1.170385,1.0,1,1.0,0.994681,0.128228,-1.018026,0.669258,0.047619,0.001384,0.00173,0.162388
6,ENSG00000269220,22,3190,22:18062511,-197577,3.031552,1.103206,1.0,5,5.0,0.973404,0.064286,-1.498019,0.809656,0.047619,0.000291,0.000868,0.139586
7,ENSG00000070413,22,2647,22:19277323,167356,3.724353,1.287164,1.0,3,3.0,0.984043,0.041943,-1.752298,0.861467,0.047619,1.2e-05,6.2e-05,0.185124
8,ENSG00000100075,22,2782,22:19428145,261802,4.668,1.26732,1.0,41,47.0,0.25,0.260392,-0.611879,0.543668,0.047619,0.003035,0.003035,0.260392
9,ENSG00000185608,22,3437,22:19660536,241111,3.190736,1.098021,1.0,1,1.0,0.994681,0.078031,-1.597082,0.906285,0.047619,0.000347,0.000868,0.154259


### Cis Mapping (Report all cis association statistics)

In [13]:
out_path = "../example/result/dat_n94"
log=get_log()

map_cis_nominal(dat, family=Poisson(), out_path=out_path, log=log, test_break=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["af"][start_row:end_row] = af[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_samples"][start_row:end_row] = ma_samples[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_count"][start_row:end_row] = ma_count[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["pval_nomin

In [9]:
# shifted transform result
pairs_df = pd.read_parquet(out_path+f'.cis_qtl_pairs.22.parquet')
pairs_df.head()

Unnamed: 0,chrom,snp,pos,phenotype_id,tss_distance,af,ma_samples,ma_count,pval_nominal,slope,slope_se,converged
0,22,22:17066020,17066020,ENSG00000177663,-499824,0.984043,3.0,3.0,0.840664,0.265719,1.321694,True
1,22,22:17066700,17066700,ENSG00000177663,-499144,0.760638,40.0,45.0,0.669325,-0.115669,0.27084,True
2,22,22:17067504,17067504,ENSG00000177663,-498340,0.31383,51.0,59.0,0.837865,0.052088,0.254553,True
3,22,22:17068400,17068400,ENSG00000177663,-497444,0.117021,21.0,22.0,0.740131,0.114253,0.344468,True
4,22,22:17069064,17069064,ENSG00000177663,-496780,0.111702,21.0,21.0,0.660578,0.171499,0.390555,True
