eQTL mapping for n=94

In [1]:
import pandas as pd
import os

from jax.config import config

from jaxqtl.families.distribution import Poisson
from jaxqtl.io.geno import PlinkReader
from jaxqtl.io.pheno import PheBedReader
from jaxqtl.io.readfile import read_data
from jaxqtl.map import map_cis, map_cis_nominal, prepare_cis_output, write_nominal
from jaxqtl.infer.permutation import BetaPerm, DirectPerm, Permutation

config.update("jax_enable_x64", True)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


### Read data

In [2]:
geno_path = "../example/data/chr22.bed"
covar_path = "../example/data/donor_features.tsv"
pheno_path = "../example/data/CD14_positive_monocyte.bed.gz"

In [3]:
# read raw data under one data class
dat = read_data(
    geno_path,
    pheno_path,
    covar_path,
    geno_reader=PlinkReader(),
    pheno_reader=PheBedReader(),
)
# dat contains: dat.geno, dat.bim, dat.count, dat.covar

In [4]:
# format data to get ready for mapping
dat_CD14 = dat.create_ReadyData()

# dat_CD14.geno # jnp.ndarray
# dat_CD14.bim  # pd.DataFrame
# dat_CD14.pheno # ExpressionData, iterable object
# dat_CD14.pheno_meta # GeneMetaData, iterable object providing tss location for identifying cis-window
# dat_CD14.covar # jnp.ndarray

### Cis Mapping (Report lead eQTL for each gene)

In [5]:
# Default is fitting poisson model and report nominal and adjusted p value using beta distribution method
# for unit testing, run cis-mapping for the first two genes

mapcis_df = map_cis(dat_CD14, family=Poisson(), perm=BetaPerm())

In [6]:
mapcis_df.head()

Unnamed: 0,phenotype_id,chrom,num_var,variant_id,tss_distance,beta_shape1,beta_shape2,beta_converged,ma_samples,ma_count,af,pval_nominal,slope,slope_se,pval_perm,pval_beta
0,ENSG00000177663,22,2592,22:17691970,126126,,,0.0,2,2.0,0.9893617021276596,3.8775917047978116e-173,-1.297385142375589,0.0462508455451612,0.0476190476190476,
1,ENSG00000069998,22,2862,22:17421074,-225103,0.0690906025545831,0.6145193813457284,1.0,1,1.0,0.9946808510638298,1.3242188329672266e-81,-2.279645377570118,0.1191431479826054,0.0476190476190476,2.4350139383452294e-06


### Cis Mapping (Report all cis association statistics)

In [8]:
mapcis_out = map_cis_nominal(dat_CD14, family=Poisson())

In [9]:
prefix = "dat_CD14_n94"
out_dir = "../example/result"
write_nominal(mapcis_out, dat_CD14, out_dir, prefix)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["af"][idx] = af
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_samples"][idx] = np.sum(g < 1.5)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_count"][idx] = n2 - np.sum(g[g > 0.5])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  outdf["ma_samples"][idx] = np.sum(
A value is trying t

OSError: Cannot save file into a non-existent directory: '../example/result'

In [None]:
# load result
pairs_df = pd.read_parquet(os.path.join(out_dir, f'{prefix}.cis_qtl_pairs.22.parquet'))
pairs_df.head()