eQTL mapping for n=94

In [12]:
import pandas as pd
import os

from jax.config import config

from jaxqtl.families.distribution import Poisson
from jaxqtl.io.geno import PlinkReader
from jaxqtl.io.pheno import PheBedReader
from jaxqtl.io.readfile import read_data
from jaxqtl.map import map_cis, map_cis_nominal, prepare_cis_output, write_nominal
from jaxqtl.infer.permutation import BetaPerm, DirectPerm, Permutation

config.update("jax_enable_x64", True)

### Read data

In [2]:
geno_path = "../example/data/chr22.bed"
covar_path = "../example/data/donor_features.tsv"
pheno_path = "../example/data/CD14_positive_monocyte.bed.gz"

In [3]:
# read raw data under one data class
dat = read_data(
    geno_path,
    pheno_path,
    covar_path,
    geno_reader=PlinkReader(),
    pheno_reader=PheBedReader(),
)
# dat contains: dat.geno, dat.bim, dat.count, dat.covar

In [4]:
# format data to get ready for mapping
dat_CD14 = dat.create_ReadyData()

# dat_CD14.geno # jnp.ndarray
# dat_CD14.bim  # pd.DataFrame
# dat_CD14.pheno # ExpressionData, iterable object
# dat_CD14.pheno_meta # GeneMetaData, iterable object providing tss location for identifying cis-window
# dat_CD14.covar # jnp.ndarray

### Cis Mapping (Report lead eQTL for each gene)

In [6]:
# Default is fitting poisson model and report nominal and adjusted p value using beta distribution method
# for unit testing, run cis-mapping for the first two genes

mapcis_out = map_cis(dat_CD14, family=Poisson(), perm=BetaPerm())

In [7]:
mapcis_df = prepare_cis_output(dat_CD14, mapcis_out)
mapcis_df.head()

Unnamed: 0,phenotype_id,num_var,beta_shape1,beta_shape2,true_df,pval_true_df,variant_id,tss_distance,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta
0,ENSG00000177663,2654,-4.420266426224249e+55,-1.1228169994115177e+60,,,22:17691970,126126,2,2.0,0.989362,3.877541720116392e-173,-1.2973851408297483,0.0462508447333097,0.0476190476190476,
1,ENSG00000069998,2962,0.0682940532833082,0.6103465238525416,,,22:17071513,-574664,9,9.0,0.952128,7.1353095527519565e-96,-1.0836402242235912,0.0521582544090457,0.0476190476190476,2.99464296104712e-07


### Cis Mapping (Report all cis association statistics)

In [14]:
mapcis_out = map_cis_nominal(dat_CD14, family=Poisson())

In [15]:
prefix = "dat_CD14_n94"
out_dir = "../example/result"
write_nominal(mapcis_out, out_dir, prefix)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf['pval_nominal'][start_row:end_row] = res.nominal_p[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf['slope'][start_row:end_row] = res.slope[idx]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf['slope_se'][start_row:end_row] = res.slope_se[idx]


In [16]:
# load result
pairs_df = pd.read_parquet(os.path.join(out_dir, f'{prefix}.cis_qtl_pairs.22.parquet'))
pairs_df.head()

Unnamed: 0,chrom,snp,pos,phenotype_id,tss_distance,af,ma_samples,ma_count,pval_nominal,slope,slope_se
0,22,22:17066020:C:T,17066020,ENSG00000177663,-499824,,,,6.770875e-30,1.861333,0.163879
1,22,22:17066700:C:T,17066700,ENSG00000177663,-499144,,,,0.01012771,-0.050836,0.019769
2,22,22:17067504:T:G,17067504,ENSG00000177663,-498340,,,,0.02865608,-0.041832,0.019117
3,22,22:17068400:T:C,17068400,ENSG00000177663,-497444,,,,3.782973e-08,0.137724,0.025038
4,22,22:17069064:A:T,17069064,ENSG00000177663,-496780,,,,2.886101e-07,0.139704,0.027229
