# xQTL Summary
- Covariates: Age (int) + gender (binary) + years of education (int), 10 principal components (double) = 13 covariates
- Used ADNI data from website; will focus on ROS/MAP data next

In [1]:
import pandas as pd
import os
from tqdm.notebook import trange, tqdm
from time import sleep
import matplotlib.pyplot as plt
#from matplotlib_venn import venn3
import numpy as np
import re

Chucks of code may require a large memory to run and may need an SSH tunnel.

On the remote compute node 'computenode' that is not directly assessible:
`jupyter notebook --no-browser --port=8889 &`

On the remote machine 'computenode':
`ssh -NfR 8890:localhost:8889 username@loginnode_name`

On the local machine 'localhost':
`ssh -NfL 8891:localhost:8890 username@loginnode_full_address`

Now open a web browser on the local machine and type:
`localhost:8891`

## Read summary data

### GWAS from Schwartzentruber et al. 2021

In [140]:
GWAS = pd.read_csv("GWAS_Schwartzentruber_2021/GCST90012877_buildGRCh37.tsv.gz", header=0, index_col=None, delim_whitespace=True)

In [141]:
GWAS

Unnamed: 0,variant_id,p_value,chromosome,base_pair_location,effect_allele,other_allele,effect_allele_frequency,beta,standard_error,SNP_ID,GWAS_BETA,GWAS_SE,GWAS_P,GWAX_UKBB_BETA,GWAX_UKBB_SE,GWAX_UKBB_P,DIRECT,I2,HET_P,INFO
0,rs61769339,0.532266,1,662622,A,G,0.110178,0.012680,0.020303,rs61769339,-0.1000,0.0457,0.02869,0.040390,0.022662,0.089,-++,0.867977,0.005920,0.777266
1,rs190214723,0.870407,1,693625,T,C,0.950775,-0.006036,0.037000,rs190214723,-0.0163,0.0685,0.81200,-0.001808,0.043966,0.910,--+,0.000000,0.858689,0.438968
2,rs12238997,0.834508,1,693731,A,G,0.884146,0.003903,0.018679,rs12238997,0.0996,0.0413,0.01598,-0.020708,0.020944,0.390,+-+,0.851848,0.009376,0.875969
3,rs72631875,0.149266,1,705882,A,G,0.066959,-0.039365,0.027297,rs72631875,-0.0184,0.0595,0.75740,-0.044954,0.030720,0.150,--+,0.000000,0.691697,0.672468
4,rs181440659,0.209173,1,718505,A,G,0.028718,-0.064142,0.051075,rs181440659,-0.1628,0.1362,0.23200,-0.047998,0.055095,0.400,--+,0.000000,0.434576,0.466472
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10687072,rs9616985,0.441902,22,51229805,T,C,0.926815,-0.015827,0.020582,rs9616985,-0.0453,0.0385,0.23960,-0.004034,0.024354,0.880,--+,0.000000,0.365023,0.988928
10687073,rs144549712,0.747566,22,51229855,A,G,0.143091,0.005819,0.018080,rs144549712,-0.0171,0.0360,0.63490,0.013550,0.020908,0.530,-++,0.000000,0.461592,0.738075
10687074,rs9616839,0.492679,22,51233300,T,C,0.334523,0.008820,0.012856,rs9616839,0.0166,0.0235,0.47800,0.005497,0.015359,0.810,+++,0.000000,0.692483,0.756590
10687075,rs200507571,0.096619,22,51236013,A,AT,0.748062,-0.023551,0.014174,rs200507571,-0.0360,0.0293,0.21940,-0.019747,0.016196,0.160,--+,0.000000,0.627329,0.803616


The SNP name is `variant_id`. The estimate `beta` is `beta`. The`SE` is `standard_error`. 
Rename the column names to standardize:

In [142]:
GWAS.rename({'chromosome': 'chr', 'variant_id': 'snps', 'p_value':'pvalue', 'standard_error':'SE', 'base_pair_location':'BP'},
            axis=1,
            inplace=True)
GWAS

Unnamed: 0,snps,pvalue,chr,BP,effect_allele,other_allele,effect_allele_frequency,beta,SE,SNP_ID,GWAS_BETA,GWAS_SE,GWAS_P,GWAX_UKBB_BETA,GWAX_UKBB_SE,GWAX_UKBB_P,DIRECT,I2,HET_P,INFO
0,rs61769339,0.532266,1,662622,A,G,0.110178,0.012680,0.020303,rs61769339,-0.1000,0.0457,0.02869,0.040390,0.022662,0.089,-++,0.867977,0.005920,0.777266
1,rs190214723,0.870407,1,693625,T,C,0.950775,-0.006036,0.037000,rs190214723,-0.0163,0.0685,0.81200,-0.001808,0.043966,0.910,--+,0.000000,0.858689,0.438968
2,rs12238997,0.834508,1,693731,A,G,0.884146,0.003903,0.018679,rs12238997,0.0996,0.0413,0.01598,-0.020708,0.020944,0.390,+-+,0.851848,0.009376,0.875969
3,rs72631875,0.149266,1,705882,A,G,0.066959,-0.039365,0.027297,rs72631875,-0.0184,0.0595,0.75740,-0.044954,0.030720,0.150,--+,0.000000,0.691697,0.672468
4,rs181440659,0.209173,1,718505,A,G,0.028718,-0.064142,0.051075,rs181440659,-0.1628,0.1362,0.23200,-0.047998,0.055095,0.400,--+,0.000000,0.434576,0.466472
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10687072,rs9616985,0.441902,22,51229805,T,C,0.926815,-0.015827,0.020582,rs9616985,-0.0453,0.0385,0.23960,-0.004034,0.024354,0.880,--+,0.000000,0.365023,0.988928
10687073,rs144549712,0.747566,22,51229855,A,G,0.143091,0.005819,0.018080,rs144549712,-0.0171,0.0360,0.63490,0.013550,0.020908,0.530,-++,0.000000,0.461592,0.738075
10687074,rs9616839,0.492679,22,51233300,T,C,0.334523,0.008820,0.012856,rs9616839,0.0166,0.0235,0.47800,0.005497,0.015359,0.810,+++,0.000000,0.692483,0.756590
10687075,rs200507571,0.096619,22,51236013,A,AT,0.748062,-0.023551,0.014174,rs200507571,-0.0360,0.0293,0.21940,-0.019747,0.016196,0.160,--+,0.000000,0.627329,0.803616


### eQTL:
We use the meta cis-eQTL results from https://adknowledgeportal.synapse.org/Explore/Studies/DetailsPage?Study=syn25398075:

> Here we define "cis" as +/- 1 MB around the gene, and GRCh37 gene locations were used for consistency with the marker imputation panel.

> Cortex_MetaAnalysis : These results have been generated from a combination of the ROSMAP and MayoRNAseq and CommonMind Consortium data.

In [None]:
eQTL = pd.read_csv("eQTL/Cortex_MetaAnalysis_ROSMAP_CMC_HBCC_Mayo_cis_eQTL_release.csv", header=0, index_col=None, delimiter=',')

In [3]:
eQTL

Unnamed: 0,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic,pvalue,FDR,beta,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,1,729679,rs4951859,1:729679_C_G,ENSG00000227232,WASH7P,-0.314474,0.753161,0.962800,-0.017125,C,G,0.828709,C,-1.0,unprocessed_pseudogene,14404.0,29570.0
1,1,736289,rs79010578,1:736289_T_A,ENSG00000227232,WASH7P,1.189180,0.234369,0.756904,0.074481,T,A,0.141396,A,-1.0,unprocessed_pseudogene,14404.0,29570.0
2,1,752566,rs3094315,1:752566_G_A,ENSG00000227232,WASH7P,-0.345971,0.729365,0.958278,-0.017129,G,A,0.825087,G,-1.0,unprocessed_pseudogene,14404.0,29570.0
3,1,752721,rs3131972,1:752721_A_G,ENSG00000227232,WASH7P,-0.323437,0.746364,0.961634,-0.015903,A,G,0.823418,A,-1.0,unprocessed_pseudogene,14404.0,29570.0
4,1,753405,rs3115860,1:753405_C_A,ENSG00000227232,WASH7P,-0.699680,0.484127,0.892694,-0.037601,C,A,0.847954,C,-1.0,unprocessed_pseudogene,14404.0,29570.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100080613,22,46541227,rs135557,22:46541227_G_A,ENSG00000273243,,0.708309,0.479130,0.887134,0.048925,G,A,0.549801,A,,,,
100080614,22,46543485,rs135556,22:46543485_C_T,ENSG00000273243,,-0.489167,0.624968,0.933616,-0.039056,C,T,0.706972,C,,,,
100080615,22,46544914,rs5767320,22:46544914_T_A,ENSG00000273243,,-1.343134,0.179922,0.694412,-0.136183,T,A,0.138721,T,,,,
100080616,22,46550106,rs4253623,22:46550106_A_G,ENSG00000273243,,-1.343092,0.179936,0.694430,-0.135785,A,G,0.138298,A,,,,


In [16]:
eQTL.set_index(['geneSymbol'], drop=False, inplace=True)

The SNP name is `snpid`. The estimate `beta` is `beta`. Need to calculate `SE` which is `beta` / `statistic`. 

Then we need to read other xQTLs and use `pd.merge()` to select those xQTLs whose SNP-gene pairs appears in the eQTL file. For this purpose, while `eQTL` is the data frame for SNP-gene pairs, other xQTLs are the data frames for SNP-feature pairs. For example, in the context of pQTL, the feature concerned is protein in UNIPROT symbol. 

We also need a table as a dictionary for feature-gene pairs. The dictionary is relatively straightforward in the processed methylQTL file. We need to consult UNIPROT for UNIPROT-gene pairs, and we need some online queries to obtain metabolome-gene pairs.

Due to memory considerations, we first merge the eQTL object (SNP-gene pairs) with the feature-gene pairs, then the result is merged with the xQTL object (SNP-feature pairs).

For methylQTL and metabQTL, we have the additional step of selecting unique SNP-gene pairs based on taking the strongest association.

### pQTL
We use the pQTL results using full ROS/MAP dataset at https://www.synapse.org/#!Synapse:syn24172458.

In [36]:
pQTL = pd.read_csv("pQTL/ROSMAP_DLPFC_pQTLs.csv", header=0, index_col=None, delimiter=',')

In [37]:
pQTL

Unnamed: 0,CHR,POS,REF,ALT,UNIPROT,BETA,SE,P,FDR,N
0,1,9500640,G,A,Q9BSK2,0.004091,0.019949,0.837700,0.968298,269
1,1,9501492,G,A,Q9BSK2,-0.014150,0.020643,0.493698,0.852987,269
2,1,9502076,A,G,Q9BSK2,-0.015608,0.018336,0.395487,0.801075,269
3,1,9503008,T,C,Q9BSK2,-0.003667,0.011188,0.743367,0.944384,269
4,1,9505821,T,C,Q9BSK2,-0.015214,0.020834,0.465919,0.839532,269
...,...,...,...,...,...,...,...,...,...,...
786627,22,33498970,T,C,O14994,0.010626,0.010118,0.294456,0.732735,330
786628,22,33500311,G,A,O14994,-0.018149,0.009063,0.046111,0.336052,330
786629,22,33501556,G,A,O14994,0.003150,0.011330,0.781181,0.954182,330
786630,22,33501743,T,C,O14994,0.004832,0.008094,0.550923,0.878009,330


Get a list of UNIPROT IDs so that we can query them online for gene IDs:

In [38]:
pQTL[['UNIPROT']].drop_duplicates().to_csv("pQTL/UNIPROT_IDs.txt", index=False, header=False)

Submitting a query on https://www.uniprot.org/uploadlists/ from UNIPROT AC/ID to Gene name, we have a dictionary in the file `UNIPROT_IDs_to_gene_names.tab`.

In [39]:
pQTL_feature_to_genes = pd.read_csv('pQTL/UNIPROT_IDs_to_gene_names.tab', sep='\t')
pQTL_feature_to_genes = pQTL_feature_to_genes.rename({'From': 'UNIPROT', 'To': 'geneSymbol'},
            axis=1)
pQTL_feature_to_genes.set_index('geneSymbol', drop=False, inplace=True)
pQTL_feature_to_genes

Unnamed: 0_level_0,UNIPROT,geneSymbol
geneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1
SLC25A33,Q9BSK2,SLC25A33
GNB1,P62873,GNB1
UBE4B,O95155,UBE4B
ICMT,O60725,ICMT
DFFA,O00273,DFFA
...,...,...
ARSA,P15289,ARSA
TTLL12,Q14166,TTLL12
PACSIN2,Q9UNF0,PACSIN2
TBC1D22A,Q8WUA7,TBC1D22A


In [42]:
pQTL_SNP_to_genes = pQTL_feature_to_genes.join(eQTL,
                                               how = 'inner',
                                               lsuffix = '_x',
                                               rsuffix = '_y')

In [43]:
pQTL_SNP_to_genes

Unnamed: 0_level_0,UNIPROT,geneSymbol_x,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol_y,statistic,pvalue,FDR,beta,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
geneSymbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A1BG,P04217,A1BG,19,57856955,rs10406132,19:57856955_T_C,ENSG00000121410,A1BG,0.566644,0.570956,0.934629,0.031715,T,C,0.325755,C,-1.0,protein_coding,58345178.0,58353499.0
A1BG,P04217,A1BG,19,57857105,rs1544494,19:57857105_C_T,ENSG00000121410,A1BG,-0.716156,0.473895,0.908752,-0.037303,C,T,0.530138,C,-1.0,protein_coding,58345178.0,58353499.0
A1BG,P04217,A1BG,19,57857142,rs73063216,19:57857142_C_A,ENSG00000121410,A1BG,-1.603183,0.108894,0.642906,-0.169437,C,A,0.081109,C,-1.0,protein_coding,58345178.0,58353499.0
A1BG,P04217,A1BG,19,57857202,rs73063217,19:57857202_T_C,ENSG00000121410,A1BG,-1.463256,0.143397,0.698797,-0.153530,T,C,0.081111,T,-1.0,protein_coding,58345178.0,58353499.0
A1BG,P04217,A1BG,19,57857948,rs2159030,19:57857948_T_C,ENSG00000121410,A1BG,-0.610235,0.541706,0.927423,-0.037554,T,C,0.764907,T,-1.0,protein_coding,58345178.0,58353499.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZZEF1,O43149,ZZEF1,17,5044506,rs8065031,17:5044506_T_A,ENSG00000074755,ZZEF1,0.968502,0.332794,0.813380,0.059109,T,A,0.101610,A,-1.0,protein_coding,4004445.0,4143020.0
ZZEF1,O43149,ZZEF1,17,5044794,rs2304449,17:5044794_G_A,ENSG00000074755,ZZEF1,-0.745941,0.455703,0.872488,-0.055736,G,A,0.070402,G,-1.0,protein_coding,4004445.0,4143020.0
ZZEF1,O43149,ZZEF1,17,5045034,rs2304448,17:5045034_T_G,ENSG00000074755,ZZEF1,-0.252133,0.800938,0.968441,-0.012295,T,G,0.821175,T,-1.0,protein_coding,4004445.0,4143020.0
ZZEF1,O43149,ZZEF1,17,5045283,rs140256080,17:5045283_T_C,ENSG00000074755,ZZEF1,-0.962268,0.335915,0.815243,-0.052422,T,C,0.138028,T,-1.0,protein_coding,4004445.0,4143020.0


In [44]:
pQTL.set_index(['UNIPROT', 'POS', 'CHR'], drop=False, inplace=True)

In [45]:
pQTL_SNP_to_genes.set_index(['UNIPROT', 'snpLocation', 'chromosome'], drop=False, inplace=True)

In [46]:
pQTL_SNP_to_genes.index.names = ['UNIPROT', 'POS', 'CHR']

In [47]:
pQTL_overlapping_eQTL = pQTL.join(pQTL_SNP_to_genes, 
           how='inner', lsuffix='_x', rsuffix='_y')

In [48]:
pQTL_overlapping_eQTL

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CHR,POS,REF,ALT,UNIPROT_x,BETA,SE,P,FDR_x,N,...,FDR_y,beta,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
UNIPROT,POS,CHR,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
A0AVF1,138719231,7,7,138719231,T,C,A0AVF1,0.028552,0.020807,0.172066,0.607685,172,...,0.830474,0.032313,T,C,0.536944,C,1.0,protein_coding,139133744.0,139191986.0
A0AVF1,138721021,7,7,138721021,A,G,A0AVF1,0.028289,0.021064,0.181335,0.619615,172,...,0.691650,0.047682,A,G,0.509803,G,1.0,protein_coding,139133744.0,139191986.0
A0AVF1,138721868,7,7,138721868,G,A,A0AVF1,0.027424,0.020904,0.191580,0.631962,172,...,0.614366,0.054792,G,A,0.506361,A,1.0,protein_coding,139133744.0,139191986.0
A0AVF1,138726383,7,7,138726383,T,G,A0AVF1,0.028895,0.021247,0.175911,0.613071,172,...,0.716628,0.045350,T,G,0.514272,G,1.0,protein_coding,139133744.0,139191986.0
A0AVF1,138729795,7,7,138729795,A,C,A0AVF1,0.026994,0.020313,0.185932,0.625461,172,...,0.864338,0.027546,A,C,0.520558,C,1.0,protein_coding,139133744.0,139191986.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Q9Y6Y8,121796742,10,10,121796742,G,T,Q9Y6Y8,-0.012629,0.005751,0.028850,0.261079,330,...,0.731878,-0.044667,G,T,0.521635,G,1.0,protein_coding,119892711.0,119944658.0
Q9Y6Y8,121797067,10,10,121797067,T,G,Q9Y6Y8,-0.012899,0.006588,0.051145,0.354094,330,...,0.814466,-0.038205,T,G,0.709971,T,1.0,protein_coding,119892711.0,119944658.0
Q9Y6Y8,121798022,10,10,121798022,C,T,Q9Y6Y8,0.004897,0.007518,0.515319,0.862697,330,...,0.980941,0.007257,C,T,0.199385,T,1.0,protein_coding,119892711.0,119944658.0
Q9Y6Y8,121798207,10,10,121798207,C,T,Q9Y6Y8,0.018308,0.009082,0.044695,0.330450,330,...,0.803624,0.057929,C,T,0.108331,T,1.0,protein_coding,119892711.0,119944658.0


In [119]:
pQTL_overlapping_eQTL_idxmin = pQTL_overlapping_eQTL.reset_index(drop=True)

In [123]:
pQTL_overlapping_eQTL_idxmin = pQTL_overlapping_eQTL_idxmin.loc[
    pQTL_overlapping_eQTL_idxmin.groupby(['CHR','POS','geneSymbol_x']).P.idxmin()].reset_index(drop=True)

In [124]:
pQTL_overlapping_eQTL_idxmin

Unnamed: 0,CHR,POS,REF,ALT,UNIPROT_x,BETA,SE,P,FDR_x,N,...,FDR_y,beta,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,1,785989,T,C,Q9Y3T9,0.029488,0.013028,0.024436,0.236994,282,...,8.132205e-01,-0.055421,T,C,0.836686,T,-1.0,protein_coding,944204.0,959309.0
1,1,838555,C,A,Q9Y3T9,-0.000840,0.010713,0.937601,0.989479,282,...,9.544147e-01,-0.020273,C,A,0.267010,C,-1.0,protein_coding,944204.0,959309.0
2,1,846808,C,T,Q9Y3T9,-0.000272,0.012387,0.982476,0.997482,282,...,7.174384e-01,0.076048,C,T,0.204360,T,-1.0,protein_coding,944204.0,959309.0
3,1,853954,C,A,P05161,-0.030620,0.028156,0.277683,0.719025,323,...,1.022552e-01,0.144398,C,A,0.578350,A,1.0,protein_coding,1001138.0,1014541.0
4,1,853954,C,A,Q9Y3T9,0.013592,0.009404,0.149576,0.575695,282,...,2.506713e-01,-0.122236,C,A,0.578350,C,-1.0,protein_coding,944204.0,959309.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730518,22,51175626,A,G,Q9BYB0,-0.010342,0.010616,0.330724,0.759008,330,...,9.901694e-01,0.007379,A,G,0.068315,G,1.0,protein_coding,50674415.0,50733298.0
730519,22,51178090,G,A,Q9UNT1,-0.000572,0.054131,0.991581,0.998953,216,...,3.150560e-25,0.863730,G,A,0.060391,A,-1.0,protein_coding,50767501.0,50783663.0
730520,22,51178090,G,A,Q9BYB0,-0.009642,0.011195,0.389769,0.797619,330,...,9.735880e-01,0.019784,G,A,0.060391,A,1.0,protein_coding,50674415.0,50733298.0
730521,22,51186228,C,T,Q9UNT1,0.041947,0.026233,0.111460,0.509686,216,...,1.805755e-01,0.097829,C,T,0.450743,T,-1.0,protein_coding,50767501.0,50783663.0


In [126]:
os.makedirs('analysis_using_meta_eQTL', exist_ok = True)
pQTL_overlapping_eQTL_idxmin.to_csv("analysis_using_meta_eQTL/pQTL_overlapping_eQTL.csv", index=False)

In [125]:
pQTL_overlapping_eQTL_idxmin.columns

Index(['CHR', 'POS', 'REF', 'ALT', 'UNIPROT_x', 'BETA', 'SE', 'P', 'FDR_x',
       'N', 'UNIPROT_y', 'geneSymbol_x', 'chromosome', 'snpLocation', 'snpid',
       'snpLocId', 'gene', 'geneSymbol_y', 'statistic', 'pvalue', 'FDR_y',
       'beta', 'A1', 'A2', 'A2freq', 'expressionIncreasingAllele', 'strand',
       'geneBiotype', 'geneStartPosition', 'geneEndPosition'],
      dtype='object')

The SNP name is `snpid`. The estimate `beta` is `BETA`. The standard error estimator `SE` is `SE`.

### metabQTL
- files:
- (fia) metabQTL/fia-metabQTL-1_long.zip
- (uplc) metabQTL/uplc-metabQTL-1.zip

#### Obtain the mapping from features (metabolites) to genes

In [4]:
# we only extract the mapping from features (metabolites) to genes 
# instead of using the actual summary statistics in the file:
metabQTL_uplc = pd.read_csv("metabQTL/uplc-metabQTL-1e-4-UPDATED.csv", header=0, index_col=None, delimiter=",")

In [5]:
metabQTL_uplc.rename({'gene':'ADNI_ID'}, axis=1, inplace=True)
metabQTL_uplc

Unnamed: 0,snps,ADNI_ID,statistic,pvalue,FDR,beta,Name,KEGG,genes
0,rs2069707,P180_M15,6.252548,3.405343e-09,0.097573,29.853509,Creatinine,C00791,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
1,rs34899222,P180_M15,5.829532,2.898698e-08,0.238744,30.443130,Creatinine,C00791,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
2,rs79155407,P180_M15,5.493135,1.491041e-07,0.404814,28.285716,Creatinine,C00791,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
3,rs2728114,P180_M15,5.301367,3.689894e-07,0.452393,15.111635,Creatinine,C00791,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
4,rs1994909,P180_M15,5.292062,3.853734e-07,0.452393,18.217046,Creatinine,C00791,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
...,...,...,...,...,...,...,...,...,...
16921,rs11930225,P180_M195,-3.997078,9.695950e-05,0.902450,-7.473984,Trp,C00078,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
16922,rs6973276,P180_M195,3.996257,9.726601e-05,0.902450,4.750904,Trp,C00078,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
16923,rs6439260,P180_M195,3.995839,9.742215e-05,0.902450,5.485977,Trp,C00078,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
16924,rs9509821,P180_M195,3.991573,9.903196e-05,0.906106,4.870799,Trp,C00078,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."


In [6]:
# extract name and genes and then do a merge (join)
compound_to_gene_uplc = metabQTL_uplc.loc[:, ['ADNI_ID','genes']].drop_duplicates()
compound_to_gene_uplc

Unnamed: 0,ADNI_ID,genes
0,P180_M15,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
940,P180_M193,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
1872,P180_M166,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
2652,P180_M6,"['HIF1A', 'MAT1A', 'AADAT', 'PIK3CA', 'NSD1', ..."
3164,P180_M29,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
4140,P180_M187,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
4403,P180_M27,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'NPR2', ..."
5142,P180_M5,"['HIF1A', 'MAT1A', 'AADAT', 'PIK3CA', 'NSD1', ..."
5860,P180_M159,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
6517,P180_M81,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."


In [7]:
# extract name and genes and then do a merge (join)
compound_to_gene_fia = pd.read_csv("metabQTL/fiaa_metabs_mapping.csv", header=0, index_col=None, delimiter=",")
compound_to_gene_fia

Unnamed: 0,Compound,ADNI_ID,KEGG_ID,genes
0,lysoPC.a.C17.0,P180_M57,C04317,"['PAFAH2', 'PLA2G4E', 'PLA2G2E', 'CHPT1', 'PLP..."
1,lysoPC.a.C18.1,P180_M59,C04230,"['LPGAT1', 'ADPRM', 'PLA2G2E', 'PLPP2', 'CHPT1..."
2,lysoPC.a.C20.3,P180_M61,C04230,"['LPGAT1', 'ADPRM', 'PLA2G2E', 'PLPP2', 'CHPT1..."
3,lysoPC.a.C20.4,P180_M62,C04230,"['LPGAT1', 'ADPRM', 'PLA2G2E', 'PLPP2', 'CHPT1..."
4,Hexadecanoylcarnitine,P180_M31,C02990,"['ALDH9A1', 'ADH7', 'ACSL3', 'CYP4A22', 'ACSL6..."


In [8]:
compound_to_gene_fia = compound_to_gene_fia.loc[:,['ADNI_ID','genes']]
compound_to_gene_fia

Unnamed: 0,ADNI_ID,genes
0,P180_M57,"['PAFAH2', 'PLA2G4E', 'PLA2G2E', 'CHPT1', 'PLP..."
1,P180_M59,"['LPGAT1', 'ADPRM', 'PLA2G2E', 'PLPP2', 'CHPT1..."
2,P180_M61,"['LPGAT1', 'ADPRM', 'PLA2G2E', 'PLPP2', 'CHPT1..."
3,P180_M62,"['LPGAT1', 'ADPRM', 'PLA2G2E', 'PLPP2', 'CHPT1..."
4,P180_M31,"['ALDH9A1', 'ADH7', 'ACSL3', 'CYP4A22', 'ACSL6..."


In [9]:
compound_to_gene = pd.concat([compound_to_gene_uplc,compound_to_gene_fia], ignore_index = True)
compound_to_gene

Unnamed: 0,ADNI_ID,genes
0,P180_M15,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'ABCC4',..."
1,P180_M193,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
2,P180_M166,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
3,P180_M6,"['HIF1A', 'MAT1A', 'AADAT', 'PIK3CA', 'NSD1', ..."
4,P180_M29,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
5,P180_M187,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
6,P180_M27,"['ALDH3B2', 'NAMPT', 'NSD1', 'DOT1L', 'NPR2', ..."
7,P180_M5,"['HIF1A', 'MAT1A', 'AADAT', 'PIK3CA', 'NSD1', ..."
8,P180_M159,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."
9,P180_M81,"['CP', 'ALDH3B2', 'NAMPT', 'NSD1', 'IDH2', 'CO..."


In [10]:
compound_to_gene_list = [pd.DataFrame([[x, w] for w in re.split("^\['|', '|'\]$", y)[1:-1]], columns = ['ADNI_ID', 'gene']) \
                      for x, y in zip(compound_to_gene['ADNI_ID'], compound_to_gene['genes'])]
compound_to_gene_list

[      ADNI_ID     gene
 0    P180_M15  ALDH3B2
 1    P180_M15    NAMPT
 2    P180_M15     NSD1
 3    P180_M15    DOT1L
 4    P180_M15    ABCC4
 ..        ...      ...
 316  P180_M15    PANK3
 317  P180_M15     GNMT
 318  P180_M15     MAOA
 319  P180_M15     ULK3
 320  P180_M15    ABCG5
 
 [321 rows x 2 columns],
        ADNI_ID     gene
 0    P180_M193       CP
 1    P180_M193  ALDH3B2
 2    P180_M193    NAMPT
 3    P180_M193     NSD1
 4    P180_M193     IDH2
 ..         ...      ...
 930  P180_M193   POLR2I
 931  P180_M193      AK4
 932  P180_M193   AKR1D1
 933  P180_M193    HADHB
 934  P180_M193    ABCG5
 
 [935 rows x 2 columns],
        ADNI_ID     gene
 0    P180_M166       CP
 1    P180_M166  ALDH3B2
 2    P180_M166    NAMPT
 3    P180_M166     NSD1
 4    P180_M166     IDH2
 ..         ...      ...
 807  P180_M166   POLR2I
 808  P180_M166      AK4
 809  P180_M166   AKR1D1
 810  P180_M166    HADHB
 811  P180_M166    ABCG5
 
 [812 rows x 2 columns],
      ADNI_ID     gene
 0    P1

In [11]:
compound_to_gene_df = pd.concat(compound_to_gene_list, ignore_index=True)
compound_to_gene_df

Unnamed: 0,ADNI_ID,gene
0,P180_M15,ALDH3B2
1,P180_M15,NAMPT
2,P180_M15,NSD1
3,P180_M15,DOT1L
4,P180_M15,ABCC4
...,...,...
19862,P180_M31,ACSL4
19863,P180_M31,CPT1C
19864,P180_M31,ACADVL
19865,P180_M31,ADH4


In [14]:
compound_to_gene_df.set_index(['gene'], drop=False, inplace=True)

#### Read all metabQTLs (caution: large files!)

In [12]:
fia_metabQTL_all = pd.read_csv('metabQTL/fia-metabQTL-1_long.zip',
                                header=0, index_col=None, delimiter=",")

In [13]:
fia_metabQTL_all

Unnamed: 0,snps,gene,statistic,pvalue,FDR,beta
0,rs293172,P180_M16,7.690385e+00,1.313581e-12,0.000798,3.237272e-02
1,rs293170,P180_M16,6.842282e+00,1.497868e-10,0.011376,3.091165e-02
2,rs293173,P180_M16,6.842282e+00,1.497868e-10,0.011376,3.091165e-02
3,rs293174,P180_M16,6.842282e+00,1.497868e-10,0.011376,3.091165e-02
4,rs293175,P180_M16,6.842282e+00,1.497868e-10,0.011376,3.091165e-02
...,...,...,...,...,...,...
607598695,rs71336711,P180_M122,4.077026e-09,1.000000e+00,1.000000,1.480980e-11
607598696,rs10424463,P180_M122,4.077026e-09,1.000000e+00,1.000000,1.480980e-11
607598697,rs10403676,P180_M122,4.077026e-09,1.000000e+00,1.000000,1.480980e-11
607598698,rs10419696,P180_M122,4.077026e-09,1.000000e+00,1.000000,1.480980e-11


In [None]:
uplc_metabQTL_all = pd.read_csv('metabQTL/uplc-metabQTL-1.zip',
                                header=0, index_col=None, delimiter=",")

In [None]:
uplc_metabQTL_all

In [17]:
metabQTL_SNP_to_genes = compound_to_gene_df.join(eQTL,
                           how = 'inner', lsuffix='_x', rsuffix='_y')

In [18]:
metabQTL_SNP_to_genes

Unnamed: 0,ADNI_ID,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic,pvalue,FDR,beta,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
AACS,P180_M193,12,124550051,rs902167,12:124550051_T_C,ENSG00000081760,AACS,1.807933,0.070617,0.505805,0.068535,T,C,0.604360,C,1.0,protein_coding,125065379.0,125143333.0
AACS,P180_M193,12,124550341,rs1686725,12:124550341_T_G,ENSG00000081760,AACS,1.088867,0.276212,0.791843,0.046814,T,G,0.764966,G,1.0,protein_coding,125065379.0,125143333.0
AACS,P180_M193,12,124551229,rs1686726,12:124551229_A_C,ENSG00000081760,AACS,1.781607,0.074813,0.518478,0.067640,A,C,0.604339,C,1.0,protein_coding,125065379.0,125143333.0
AACS,P180_M193,12,124552125,rs113624010,12:124552125_G_A,ENSG00000081760,AACS,-1.075117,0.282323,0.796150,-0.054735,G,A,0.160103,G,1.0,protein_coding,125065379.0,125143333.0
AACS,P180_M193,12,124552349,rs80081406,12:124552349_C_T,ENSG00000081760,AACS,-1.030249,0.302893,0.809551,-0.052422,C,T,0.160254,C,1.0,protein_coding,125065379.0,125143333.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WNT9A,P180_M197,1,229133819,rs61825092,1:229133819_G_A,ENSG00000143816,WNT9A,-0.250360,0.802309,0.971560,-0.048691,G,A,0.010579,G,-1.0,protein_coding,227918656.0,227947898.0
WNT9A,P180_M197,1,229134166,rs61825093,1:229134166_G_A,ENSG00000143816,WNT9A,-0.251780,0.801211,0.971372,-0.048976,G,A,0.010571,G,-1.0,protein_coding,227918656.0,227947898.0
WNT9A,P180_M197,1,229134471,rs630716,1:229134471_A_G,ENSG00000143816,WNT9A,-0.925373,0.354772,0.837536,-0.055660,A,G,0.757571,A,-1.0,protein_coding,227918656.0,227947898.0
WNT9A,P180_M197,1,229134840,rs72760181,1:229134840_G_A,ENSG00000143816,WNT9A,1.662608,0.096391,0.569502,0.273474,G,A,0.021260,A,-1.0,protein_coding,227918656.0,227947898.0


In [21]:
metabQTL_SNP_to_genes.set_index(['ADNI_ID', 'snpid'], drop=False, inplace=True)

In [23]:
fia_metabQTL_all.set_index(['gene', 'snps'], drop=False, inplace=True)

In [27]:
fia_metabQTL_all.index.names = ['ADNI_ID', 'snpid']

In [31]:
metabQTL_SNP_to_genes.sort_index(inplace=True)

In [32]:
fia_metabQTL_all.sort_index(inplace=True)

In [33]:
fia_metabQTL_overlapping_eQTL = fia_metabQTL_all.join(metabQTL_SNP_to_genes, 
           how='inner', lsuffix='_x', rsuffix='_y')

In [34]:
fia_metabQTL_overlapping_eQTL

Unnamed: 0_level_0,Unnamed: 1_level_0,snps,gene_x,statistic_x,pvalue_x,FDR_x,beta_x,ADNI_ID,chromosome,snpLocation,snpid,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
ADNI_ID,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
P180_M31,rs10000018,rs10000018,P180_M31,-2.732361,0.006982,0.999996,-0.008236,P180_M31,4,100458448,rs10000018,...,0.850325,-0.047233,A,G,0.294417,A,-1.0,protein_coding,99304964.0,99321401.0
P180_M31,rs10000018,rs10000018,P180_M31,-2.732361,0.006982,0.999996,-0.008236,P180_M31,4,100458448,rs10000018,...,0.941563,0.017465,A,G,0.294417,G,-1.0,protein_coding,99070978.0,99088801.0
P180_M31,rs10000034,rs10000034,P180_M31,-1.207171,0.229115,0.999996,-0.005554,P180_M31,4,8174883,rs10000034,...,0.985055,0.008433,T,G,0.098137,G,-1.0,protein_coding,8366282.0,8440723.0
P180_M31,rs10000067,rs10000067,P180_M31,-0.960513,0.338220,0.999996,-0.003402,P180_M31,4,100296476,rs10000067,...,0.943883,-0.025708,T,C,0.194369,T,-1.0,protein_coding,99304964.0,99321401.0
P180_M31,rs10000067,rs10000067,P180_M31,-0.960513,0.338220,0.999996,-0.003402,P180_M31,4,100296476,rs10000067,...,0.797216,-0.046855,T,C,0.194369,T,-1.0,protein_coding,99070978.0,99088801.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
P180_M62,rs9999788,rs9999788,P180_M62,-0.118037,0.906183,0.999996,-0.053725,P180_M62,4,109921572,rs9999788,...,0.972345,0.011363,C,T,0.857350,T,-1.0,protein_coding,109709989.0,109730077.0
P180_M62,rs999979,rs999979,P180_M62,0.476060,0.634669,0.999996,0.159791,P180_M62,3,170832832,rs999979,...,0.696261,-0.048334,C,T,0.575631,C,-1.0,protein_coding,171600405.0,171810950.0
P180_M62,rs9999824,rs9999824,P180_M62,-1.308057,0.192696,0.999996,-0.454538,P180_M62,4,110935774,rs9999824,...,0.574103,-0.066085,A,C,0.299302,A,-1.0,protein_coding,109709989.0,109730077.0
P180_M62,rs9999830,rs9999830,P180_M62,1.294146,0.197445,0.999996,0.668025,P180_M62,4,109708724,rs9999830,...,0.994642,0.002756,A,G,0.138399,G,-1.0,protein_coding,109709989.0,109730077.0


In [113]:
fia_metabQTL_overlapping_eQTL_idxmin = fia_metabQTL_overlapping_eQTL.reset_index(drop=True)

In [114]:
fia_metabQTL_overlapping_eQTL_idxmin = fia_metabQTL_overlapping_eQTL_idxmin.loc[
    fia_metabQTL_overlapping_eQTL_idxmin.groupby(['snps','geneSymbol']).pvalue_x.idxmin()].reset_index(drop=True)

In [115]:
fia_metabQTL_overlapping_eQTL_idxmin

Unnamed: 0,snps,gene_x,statistic_x,pvalue_x,FDR_x,beta_x,ADNI_ID,chromosome,snpLocation,snpid,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,rs10000005,P180_M59,-2.383190,0.018314,0.999996,-2.262149,P180_M59,4,85161558,rs10000005,...,0.860259,0.029990,G,A,0.544044,A,1.0,protein_coding,84582979.0,84651338.0
1,rs10000005,P180_M59,-2.383190,0.018314,0.999996,-2.262149,P180_M59,4,85161558,rs10000005,...,0.844547,-0.032557,G,A,0.544044,G,1.0,protein_coding,83535914.0,83605875.0
2,rs10000012,P180_M59,-1.645260,0.101843,0.999996,-2.239805,P180_M59,4,1357325,rs10000012,...,0.873740,-0.039266,C,G,0.141901,C,-1.0,protein_coding,958887.0,986895.0
3,rs10000015,P180_M62,1.681095,0.094660,0.999996,1.440465,P180_M62,4,84143987,rs10000015,...,0.446931,-0.161501,A,G,0.059053,A,1.0,protein_coding,83535914.0,83605875.0
4,rs10000017,P180_M62,-1.159114,0.248105,0.999996,-0.484103,P180_M62,4,84778125,rs10000017,...,0.777392,-0.049897,C,T,0.229813,C,1.0,protein_coding,84582979.0,84651338.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382880,rs9999909,P180_M62,-0.914158,0.361984,0.999996,-0.323681,P180_M62,4,84182715,rs9999909,...,0.868960,-0.029870,C,G,0.345358,C,1.0,protein_coding,83535914.0,83605875.0
382881,rs9999976,P180_M31,-0.686366,0.493458,0.999996,-0.002443,P180_M31,4,100296362,rs9999976,...,0.943843,-0.025792,T,C,0.193930,T,-1.0,protein_coding,99304964.0,99321401.0
382882,rs9999976,P180_M31,-0.686366,0.493458,0.999996,-0.002443,P180_M31,4,100296362,rs9999976,...,0.793215,-0.047583,T,C,0.193930,T,-1.0,protein_coding,99070978.0,99088801.0
382883,rs9999995,P180_M31,-0.942792,0.347182,0.999996,-0.005681,P180_M31,4,185171608,rs9999995,...,0.842803,-0.058542,A,G,0.078814,A,-1.0,protein_coding,184755595.0,184826818.0


In [116]:
fia_metabQTL_overlapping_eQTL_idxmin.drop(['gene_x','gene_y'], axis=1, inplace=True)

In [117]:
fia_metabQTL_overlapping_eQTL_idxmin

Unnamed: 0,snps,statistic_x,pvalue_x,FDR_x,beta_x,ADNI_ID,chromosome,snpLocation,snpid,snpLocId,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,rs10000005,-2.383190,0.018314,0.999996,-2.262149,P180_M59,4,85161558,rs10000005,4:85161558_G_A,...,0.860259,0.029990,G,A,0.544044,A,1.0,protein_coding,84582979.0,84651338.0
1,rs10000005,-2.383190,0.018314,0.999996,-2.262149,P180_M59,4,85161558,rs10000005,4:85161558_G_A,...,0.844547,-0.032557,G,A,0.544044,G,1.0,protein_coding,83535914.0,83605875.0
2,rs10000012,-1.645260,0.101843,0.999996,-2.239805,P180_M59,4,1357325,rs10000012,4:1357325_C_G,...,0.873740,-0.039266,C,G,0.141901,C,-1.0,protein_coding,958887.0,986895.0
3,rs10000015,1.681095,0.094660,0.999996,1.440465,P180_M62,4,84143987,rs10000015,4:84143987_A_G,...,0.446931,-0.161501,A,G,0.059053,A,1.0,protein_coding,83535914.0,83605875.0
4,rs10000017,-1.159114,0.248105,0.999996,-0.484103,P180_M62,4,84778125,rs10000017,4:84778125_C_T,...,0.777392,-0.049897,C,T,0.229813,C,1.0,protein_coding,84582979.0,84651338.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382880,rs9999909,-0.914158,0.361984,0.999996,-0.323681,P180_M62,4,84182715,rs9999909,4:84182715_C_G,...,0.868960,-0.029870,C,G,0.345358,C,1.0,protein_coding,83535914.0,83605875.0
382881,rs9999976,-0.686366,0.493458,0.999996,-0.002443,P180_M31,4,100296362,rs9999976,4:100296362_T_C,...,0.943843,-0.025792,T,C,0.193930,T,-1.0,protein_coding,99304964.0,99321401.0
382882,rs9999976,-0.686366,0.493458,0.999996,-0.002443,P180_M31,4,100296362,rs9999976,4:100296362_T_C,...,0.793215,-0.047583,T,C,0.193930,T,-1.0,protein_coding,99070978.0,99088801.0
382883,rs9999995,-0.942792,0.347182,0.999996,-0.005681,P180_M31,4,185171608,rs9999995,4:185171608_A_G,...,0.842803,-0.058542,A,G,0.078814,A,-1.0,protein_coding,184755595.0,184826818.0


In [118]:
fia_metabQTL_overlapping_eQTL.to_csv("analysis_using_meta_eQTL/fia_metabQTL_overlapping_eQTL.csv", index=False)

In [None]:
uplc_metabQTL_all.set_index(['gene', 'snps'], drop=False, inplace=True)

In [None]:
uplc_metabQTL_all.index.names = ['ADNI_ID', 'snpid']

In [None]:
uplc_metabQTL_all.sort_index(inplace=True)

In [None]:
uplc_metabQTL_overlapping_eQTL = uplc_metabQTL_all.join(metabQTL_SNP_to_genes, 
           how='inner', lsuffix='_x', rsuffix='_y')

In [None]:
uplc_metabQTL_overlapping_eQTL

In [None]:
uplc_metabQTL_overlapping_eQTL_idxmin = uplc_metabQTL_overlapping_eQTL.reset_index(drop=True)

In [None]:
uplc_metabQTL_overlapping_eQTL_idxmin = uplc_metabQTL_overlapping_eQTL_idxmin.loc[
    uplc_metabQTL_overlapping_eQTL_idxmin.groupby(['snps','gene_x']).pvalue_x.idxmin()].reset_index(drop=True)

In [None]:
uplc_metabQTL_overlapping_eQTL_idxmin

In [None]:
uplc_metabQTL_overlapping_eQTL_idxmin.drop(['gene','gene_x','gene_y'], axis=1, inplace=True)

In [None]:
uplc_metabQTL_overlapping_eQTL_idxmin

In [None]:
uplc_metabQTL_overlapping_eQTL_idxmin.to_csv("analysis_using_meta_eQTL/uplc_metabQTL_overlapping_eQTL.csv", index=False)

In [127]:
uplc_metabQTL_overlapping_eQTL_idxmin = pd.read_csv("analysis_using_meta_eQTL/uplc_metabQTL_overlapping_eQTL.csv", header=0, index_col=None, delimiter=",")

In [128]:
metabQTL_overlapping_eQTL_idxmin = pd.concat(
    [uplc_metabQTL_overlapping_eQTL_idxmin, fia_metabQTL_overlapping_eQTL_idxmin],
    axis=0,
    ignore_index=True)

In [134]:
metabQTL_overlapping_eQTL_idxmin

Unnamed: 0,snps,statistic_x,pvalue_x,FDR_x,beta_x,ADNI_ID,chromosome,snpLocation,snpid,snpLocId,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,rs10000003,2.179230,0.030750,0.999937,2.626286,P180_M14,4,57561647,rs10000003,4:57561647_A_G,...,0.639259,0.058436,A,G,0.700927,G,-1.0,protein_coding,56338287.0,56387508.0
1,rs10000003,1.329355,0.185588,0.999937,0.777456,P180_M193,4,57561647,rs10000003,4:57561647_A_G,...,0.782251,-0.043648,A,G,0.700927,A,1.0,protein_coding,56435741.0,56464579.0
2,rs10000003,1.329355,0.185588,0.999937,0.777456,P180_M193,4,57561647,rs10000003,4:57561647_A_G,...,0.779798,0.043499,A,G,0.700927,G,1.0,protein_coding,56977722.0,57031168.0
3,rs10000003,2.179230,0.030750,0.999937,2.626286,P180_M14,4,57561647,rs10000003,4:57561647_A_G,...,0.586852,0.064515,A,G,0.700927,G,-1.0,protein_coding,56393362.0,56435615.0
4,rs10000005,2.215504,0.028111,0.999937,7.594640,P180_M167,4,85161558,rs10000005,4:85161558_G_A,...,0.860259,0.029990,G,A,0.544044,A,1.0,protein_coding,84582979.0,84651338.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3225691,rs9999909,-0.914158,0.361984,0.999996,-0.323681,P180_M62,4,84182715,rs9999909,4:84182715_C_G,...,0.868960,-0.029870,C,G,0.345358,C,1.0,protein_coding,83535914.0,83605875.0
3225692,rs9999976,-0.686366,0.493458,0.999996,-0.002443,P180_M31,4,100296362,rs9999976,4:100296362_T_C,...,0.943843,-0.025792,T,C,0.193930,T,-1.0,protein_coding,99304964.0,99321401.0
3225693,rs9999976,-0.686366,0.493458,0.999996,-0.002443,P180_M31,4,100296362,rs9999976,4:100296362_T_C,...,0.793215,-0.047583,T,C,0.193930,T,-1.0,protein_coding,99070978.0,99088801.0
3225694,rs9999995,-0.942792,0.347182,0.999996,-0.005681,P180_M31,4,185171608,rs9999995,4:185171608_A_G,...,0.842803,-0.058542,A,G,0.078814,A,-1.0,protein_coding,184755595.0,184826818.0


In [131]:
metabQTL_overlapping_eQTL_idxmin = metabQTL_overlapping_eQTL_idxmin.loc[
    metabQTL_overlapping_eQTL_idxmin.groupby(['snps','geneSymbol']).pvalue_x.idxmin()].reset_index(drop=True)

In [132]:
metabQTL_overlapping_eQTL_idxmin

Unnamed: 0,snps,statistic_x,pvalue_x,FDR_x,beta_x,ADNI_ID,chromosome,snpLocation,snpid,snpLocId,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,rs10000003,2.179230,0.030750,0.999937,2.626286,P180_M14,4,57561647,rs10000003,4:57561647_A_G,...,0.639259,0.058436,A,G,0.700927,G,-1.0,protein_coding,56338287.0,56387508.0
1,rs10000003,1.329355,0.185588,0.999937,0.777456,P180_M193,4,57561647,rs10000003,4:57561647_A_G,...,0.782251,-0.043648,A,G,0.700927,A,1.0,protein_coding,56435741.0,56464579.0
2,rs10000003,1.329355,0.185588,0.999937,0.777456,P180_M193,4,57561647,rs10000003,4:57561647_A_G,...,0.779798,0.043499,A,G,0.700927,G,1.0,protein_coding,56977722.0,57031168.0
3,rs10000003,2.179230,0.030750,0.999937,2.626286,P180_M14,4,57561647,rs10000003,4:57561647_A_G,...,0.586852,0.064515,A,G,0.700927,G,-1.0,protein_coding,56393362.0,56435615.0
4,rs10000005,-2.383190,0.018314,0.999996,-2.262149,P180_M59,4,85161558,rs10000005,4:85161558_G_A,...,0.860259,0.029990,G,A,0.544044,A,1.0,protein_coding,84582979.0,84651338.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2925854,rs9999976,2.321265,0.021511,0.999937,4.875438,P180_M196,4,100296362,rs9999976,4:100296362_T_C,...,0.793215,-0.047583,T,C,0.193930,T,-1.0,protein_coding,99070978.0,99088801.0
2925855,rs9999976,-2.565780,0.011195,0.999937,-3.551833,P180_M14,4,100296362,rs9999976,4:100296362_T_C,...,0.576729,-0.072251,T,C,0.193930,T,-1.0,protein_coding,98871684.0,98930637.0
2925856,rs9999981,2.779724,0.006081,0.999937,12.374678,P180_M26,4,139575905,rs9999981,4:139575905_A_G,...,0.715943,0.049620,A,G,0.624530,G,-1.0,protein_coding,139495941.0,139606699.0
2925857,rs9999995,-0.942792,0.347182,0.999996,-0.005681,P180_M31,4,185171608,rs9999995,4:185171608_A_G,...,0.842803,-0.058542,A,G,0.078814,A,-1.0,protein_coding,184755595.0,184826818.0


In [133]:
metabQTL_overlapping_eQTL_idxmin.to_csv("analysis_using_meta_eQTL/metabQTL_overlapping_eQTL.csv", index=False)

### methylQTL
- Used data from 209 ADNI 2/GO controls
- Used Bioconductor package data to match probes to genes
- Used an annotation file from Illumina which maps CpG’s to genes
- Aggregated probes to create gene-level signals (mean)
- methylQTL performed on a gene level

In [3]:
mQTL = pd.read_csv("Filtered eQTL and mQTL/controls.txt", header=0, index_col=None, delimiter=",")

In [4]:
mQTL

Unnamed: 0,snps,gene,statistic,pvalue,FDR,beta
0,rs2069978,GPX4;GPX4;GPX4,9.889386,6.223929e-19,1.847805e-08,0.078782
1,rs2069978,GPX4;GPX4;GPX4;GPX4;GPX4,9.607171,4.019592e-18,5.966828e-08,0.109791
2,rs2069984,GPX4;GPX4;GPX4,9.085744,1.201035e-16,5.942866e-07,0.070466
3,rs2069987,GPX4;GPX4;GPX4,9.085744,1.201035e-16,5.942866e-07,0.070466
4,rs60428566,GPX4;GPX4;GPX4,9.085744,1.201035e-16,5.942866e-07,0.070466
...,...,...,...,...,...,...
30888009,rs1355106,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573
30888010,rs1355107,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573
30888011,rs12499737,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573
30888012,rs12504629,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573


In [5]:
gene_group_to_gene = pd.DataFrame(columns=['gene', 'Symbol'])
unique_gene_lists = np.unique(mQTL.gene)
for gene_list in tqdm(unique_gene_lists):
    genes = list(set(gene_list.split(';')))
    for i in range(len(genes)):
        gene_group_to_gene = gene_group_to_gene.append(
            pd.DataFrame([[gene_list, genes[i]]], columns=['gene', 'Symbol']), ignore_index=True)
gene_group_to_gene

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4793.0), HTML(value='')))




Unnamed: 0,gene,Symbol
0,AA06;ASIC2,ASIC2
1,AA06;ASIC2,AA06
2,AACS,AACS
3,AACS;AACS,AACS
4,AACSL,AACSL
...,...,...
6763,ZPLD1,ZPLD1
6764,ZSCAN29;TUBGCP4,TUBGCP4
6765,ZSCAN29;TUBGCP4,ZSCAN29
6766,ZSCAN29;TUBGCP4;TUBGCP4,TUBGCP4


In [7]:
gene_group_to_gene.set_index('Symbol', inplace=True)

KeyError: "None of ['Symbol'] are in the columns"

In [56]:
mQTL_SNP_to_genes = gene_group_to_gene.join(eQTL,
                           how = 'inner', lsuffix='_x', rsuffix='_y')

In [57]:
mQTL_SNP_to_genes

Unnamed: 0,gene_x,chromosome,snpLocation,snpid,snpLocId,gene_y,geneSymbol,statistic,pvalue,FDR,beta,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
A1BG,A1BG,19,57856955,rs10406132,19:57856955_T_C,ENSG00000121410,A1BG,0.566644,0.570956,0.934629,0.031715,T,C,0.325755,C,-1.0,protein_coding,58345178.0,58353499.0
A1BG,A1BG,19,57857105,rs1544494,19:57857105_C_T,ENSG00000121410,A1BG,-0.716156,0.473895,0.908752,-0.037303,C,T,0.530138,C,-1.0,protein_coding,58345178.0,58353499.0
A1BG,A1BG,19,57857142,rs73063216,19:57857142_C_A,ENSG00000121410,A1BG,-1.603183,0.108894,0.642906,-0.169437,C,A,0.081109,C,-1.0,protein_coding,58345178.0,58353499.0
A1BG,A1BG,19,57857202,rs73063217,19:57857202_T_C,ENSG00000121410,A1BG,-1.463256,0.143397,0.698797,-0.153530,T,C,0.081111,T,-1.0,protein_coding,58345178.0,58353499.0
A1BG,A1BG,19,57857948,rs2159030,19:57857948_T_C,ENSG00000121410,A1BG,-0.610235,0.541706,0.927423,-0.037554,T,C,0.764907,T,-1.0,protein_coding,58345178.0,58353499.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZZZ3,ZZZ3;ZZZ3;ZZZ3,1,79147922,rs72935685,1:79147922_T_C,ENSG00000036549,ZZZ3,0.330917,0.740707,0.960569,0.023024,T,C,0.094893,C,-1.0,protein_coding,77562416.0,77683419.0
ZZZ3,ZZZ3;ZZZ3;ZZZ3,1,79147976,rs17102049,1:79147976_G_A,ENSG00000036549,ZZZ3,0.330749,0.740834,0.960602,0.023012,G,A,0.094874,A,-1.0,protein_coding,77562416.0,77683419.0
ZZZ3,ZZZ3;ZZZ3;ZZZ3,1,79148288,rs17398682,1:79148288_C_T,ENSG00000036549,ZZZ3,0.944410,0.344960,0.832257,0.036983,C,T,0.323603,T,-1.0,protein_coding,77562416.0,77683419.0
ZZZ3,ZZZ3;ZZZ3;ZZZ3,1,79148633,rs17102051,1:79148633_A_G,ENSG00000036549,ZZZ3,0.462890,0.643443,0.939693,0.032269,A,G,0.094833,G,-1.0,protein_coding,77562416.0,77683419.0


In [59]:
mQTL_SNP_to_genes.set_index(['gene_x', 'snpid'], drop=False, inplace=True)

In [60]:
mQTL.set_index(['gene', 'snps'], drop=False, inplace=True)

In [61]:
mQTL_SNP_to_genes.index.names = ['gene', 'snps']

In [63]:
mQTL_SNP_to_genes.sort_index(inplace=True)

In [64]:
mQTL.sort_index(inplace=True)

In [65]:
mQTL_overlapping_eQTL = mQTL.join(mQTL_SNP_to_genes, 
           how='inner', lsuffix='_x', rsuffix='_y')

In [66]:
mQTL_overlapping_eQTL

Unnamed: 0_level_0,Unnamed: 1_level_0,snps,gene,statistic_x,pvalue_x,FDR_x,beta_x,gene_x,chromosome,snpLocation,snpid,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
gene,snps,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A2ML1;A2ML1;A2ML1,rs61919512,rs61919512,A2ML1;A2ML1;A2ML1,-4.157133,0.000048,0.832931,-0.034387,A2ML1;A2ML1;A2ML1,12,9030678,rs61919512,...,0.833074,-0.060928,C,G,0.096253,C,1.0,protein_coding,8822472.0,8887001.0
AACS,rs1971321,rs1971321,AACS,-4.020785,0.000083,0.867863,-0.012643,AACS,12,124693529,rs1971321,...,0.243394,-0.170759,C,T,0.076277,C,1.0,protein_coding,125065379.0,125143333.0
AACS,rs75675338,rs75675338,AACS,-4.183230,0.000044,0.825360,-0.012431,AACS,12,124683876,rs75675338,...,0.143331,-0.196451,C,G,0.076026,C,1.0,protein_coding,125065379.0,125143333.0
AACS,rs78271556,rs78271556,AACS,-3.984283,0.000096,0.876034,-0.012156,AACS,12,124686868,rs78271556,...,0.150354,-0.194953,G,A,0.076246,G,1.0,protein_coding,125065379.0,125143333.0
AACS,rs78772671,rs78772671,AACS,-4.183230,0.000044,0.825360,-0.012431,AACS,12,124672811,rs78772671,...,0.275519,-0.170800,C,T,0.075854,C,1.0,protein_coding,125065379.0,125143333.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZXDC,rs7620198,rs7620198,ZXDC,-4.547077,0.000010,0.695064,-0.017365,ZXDC,3,127095546,rs7620198,...,0.899599,0.024481,A,G,0.727734,G,-1.0,protein_coding,126437601.0,126475919.0
ZXDC,rs77826798,rs77826798,ZXDC,-4.123829,0.000055,0.842099,-0.018881,ZXDC,3,127176591,rs77826798,...,0.826746,-0.042354,A,G,0.184010,A,-1.0,protein_coding,126437601.0,126475919.0
ZXDC,rs78007488,rs78007488,ZXDC,-4.123829,0.000055,0.842099,-0.018881,ZXDC,3,127176696,rs78007488,...,0.826896,-0.042332,T,G,0.184008,T,-1.0,protein_coding,126437601.0,126475919.0
ZXDC,rs9864330,rs9864330,ZXDC,-4.289584,0.000028,0.792448,-0.016432,ZXDC,3,127097657,rs9864330,...,0.863125,0.030669,G,C,0.727489,C,-1.0,protein_coding,126437601.0,126475919.0


In [109]:
mQTL_overlapping_eQTL_idxmin = mQTL_overlapping_eQTL.reset_index(drop=True)

In [110]:
mQTL_overlapping_eQTL_idxmin = mQTL_overlapping_eQTL_idxmin.loc[
    mQTL_overlapping_eQTL_idxmin.groupby(['snpid','geneSymbol']).pvalue_x.idxmin()].reset_index(drop=True)

In [111]:
mQTL_overlapping_eQTL_idxmin

Unnamed: 0,snps,gene,statistic_x,pvalue_x,FDR_x,beta_x,gene_x,chromosome,snpLocation,snpid,...,FDR_y,beta_y,A1,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition
0,rs10002677,MARCH1;ANP32C,-4.206334,0.000040,0.818639,-0.025389,MARCH1;ANP32C,4,165089161,rs10002677,...,0.911105,0.041795,T,C,0.071065,C,-1.0,protein_coding,163524298.0,164384050.0
1,rs10002697,MARCH1;ANP32C,-4.206334,0.000040,0.818639,-0.025389,MARCH1;ANP32C,4,165089209,rs10002697,...,0.911026,0.041834,T,C,0.071070,C,-1.0,protein_coding,163524298.0,164384050.0
2,rs10005414,NOP14;C4orf10,4.177991,0.000044,0.826790,0.017688,NOP14;C4orf10,4,3755589,rs10005414,...,0.987916,-0.004360,C,T,0.340987,C,-1.0,protein_coding,2937933.0,2963385.0
3,rs10006034,MARCH1;ANP32C,-4.206334,0.000040,0.818639,-0.025389,MARCH1;ANP32C,4,165090247,rs10006034,...,0.911105,0.041795,T,G,0.071075,G,-1.0,protein_coding,163524298.0,164384050.0
4,rs10007144,NPY5R,-4.776194,0.000004,0.588449,-0.009351,NPY5R,4,163795426,rs10007144,...,0.704918,-0.048108,A,C,0.513884,A,1.0,protein_coding,163343939.0,163351934.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24033,rs9992306,NFXL1;NFXL1;NFXL1;NFXL1;NFXL1;LOC101927157,-4.233884,0.000035,0.810542,-0.011278,NFXL1;NFXL1;NFXL1;NFXL1;NFXL1;LOC101927157,4,47403236,rs9992306,...,0.853084,-0.050074,T,C,0.099746,T,-1.0,protein_coding,47847233.0,47914667.0
24034,rs9994011,MARCH1;ANP32C,-4.172229,0.000045,0.828613,-0.023988,MARCH1;ANP32C,4,165090228,rs9994011,...,0.911105,0.041795,C,A,0.071080,A,-1.0,protein_coding,163524298.0,164384050.0
24035,rs999468,ZXDC,-4.123829,0.000055,0.842099,-0.018881,ZXDC,3,127173559,rs999468,...,0.818820,-0.043363,C,A,0.184226,C,-1.0,protein_coding,126437601.0,126475919.0
24036,rs999723,CRLF3;CRLF3;CRLF3,-4.121360,0.000056,0.842741,-0.003995,CRLF3;CRLF3;CRLF3,17,29836324,rs999723,...,0.962215,0.011547,G,A,0.643915,A,-1.0,protein_coding,30769388.0,30824776.0


In [136]:
mQTL_overlapping_eQTL_idxmin.to_csv("analysis_using_meta_eQTL/mQTL_overlapping_eQTL.csv", index=False)

### Put everything together

In [296]:
mQTL_overlapping_eQTL_idxmin.columns

Index(['snps', 'gene', 'statistic_x', 'pvalue_x', 'FDR_x', 'beta_x', 'gene_x',
       'chromosome', 'snpLocation', 'snpid', 'snpLocId', 'gene_y',
       'geneSymbol', 'statistic_y', 'pvalue_y', 'FDR_y', 'beta_y', 'A1', 'A2',
       'A2freq', 'expressionIncreasingAllele', 'strand', 'geneBiotype',
       'geneStartPosition', 'geneEndPosition'],
      dtype='object')

In [297]:
pQTL_overlapping_eQTL_idxmin.columns

Index(['CHR', 'POS', 'REF', 'ALT', 'UNIPROT_x', 'BETA', 'SE', 'P', 'FDR_x',
       'N', 'UNIPROT_y', 'geneSymbol_x', 'chromosome', 'snpLocation', 'snpid',
       'snpLocId', 'gene', 'geneSymbol_y', 'statistic', 'pvalue', 'FDR_y',
       'beta', 'A1', 'A2', 'A2freq', 'expressionIncreasingAllele', 'strand',
       'geneBiotype', 'geneStartPosition', 'geneEndPosition',
       'statistic_prot'],
      dtype='object')

In [298]:
metabQTL_overlapping_eQTL_idxmin.columns

Index(['snps', 'statistic_x', 'pvalue_x', 'FDR_x', 'beta_x', 'ADNI_ID',
       'chromosome', 'snpLocation', 'snpid', 'snpLocId', 'geneSymbol',
       'statistic_y', 'pvalue_y', 'FDR_y', 'beta_y', 'A1', 'A2', 'A2freq',
       'expressionIncreasingAllele', 'strand', 'geneBiotype',
       'geneStartPosition', 'geneEndPosition'],
      dtype='object')

We will use mQTL data frame as a start, use `snpid` and `geneSymbol` as the index, collect `pvalue`, `stat`, `beta` columns, and merge them into one data frame. Then we add GWAS information.

In [299]:
xQTLs = mQTL_overlapping_eQTL_idxmin.drop(
    columns=['snps', 'gene', 'FDR_x', 'gene_x', 'FDR_y']).rename(
    {'statistic_x':'statistic_methyl', 'pvalue_x':'pvalue_methyl', 'beta_x':'beta_methyl', 'gene_y':'gene', 
       'statistic_y':'statistic_expr', 'pvalue_y':'pvalue_expr', 'beta_y':'beta_expr'}, axis=1)

In [300]:
xQTLs.columns

Index(['statistic_methyl', 'pvalue_methyl', 'beta_methyl', 'chromosome',
       'snpLocation', 'snpid', 'snpLocId', 'gene', 'geneSymbol',
       'statistic_expr', 'pvalue_expr', 'beta_expr', 'A1', 'A2', 'A2freq',
       'expressionIncreasingAllele', 'strand', 'geneBiotype',
       'geneStartPosition', 'geneEndPosition'],
      dtype='object')

In [301]:
xQTLs.set_index(['geneSymbol', 'snpid'], drop=False, inplace=True)

In [302]:
pQTL_overlapping_eQTL_idxmin['statistic_prot'] = pQTL_overlapping_eQTL_idxmin['BETA'] / pQTL_overlapping_eQTL_idxmin['SE']

In [303]:
pQTL_formatted = pQTL_overlapping_eQTL_idxmin[['statistic_prot', 'P', 'BETA', 'snpid', 'geneSymbol_x']].rename(
    {'BETA':'beta_prot', 'P':'pvalue_prot', 'geneSymbol_x':'geneSymbol'}, axis=1)

In [304]:
pQTL_formatted.set_index(['geneSymbol', 'snpid'], inplace=True)

In [350]:
xQTLs_prot = pQTL_overlapping_eQTL_idxmin.drop(
    columns=['CHR', 'POS', 'REF', 'ALT', 'UNIPROT_x', 'SE', 'FDR_x', 'N', 'UNIPROT_y', 'geneSymbol_y', 'FDR_y']).rename(
    {'BETA':'beta_prot', 'P':'pvalue_prot', 'geneSymbol_x':'geneSymbol',
     'statistic':'statistic_expr', 'pvalue':'pvalue_expr', 'beta':'beta_expr'}, axis=1)

In [351]:
xQTLs_prot.columns

Index(['beta_prot', 'pvalue_prot', 'geneSymbol', 'chromosome', 'snpLocation',
       'snpid', 'snpLocId', 'gene', 'statistic_expr', 'pvalue_expr',
       'beta_expr', 'A1', 'A2', 'A2freq', 'expressionIncreasingAllele',
       'strand', 'geneBiotype', 'geneStartPosition', 'geneEndPosition',
       'statistic_prot'],
      dtype='object')

In [305]:
metabQTL_formatted = metabQTL_overlapping_eQTL_idxmin[['statistic_x', 'pvalue_x', 'beta_x', 'snpid', 'geneSymbol']].rename(
    {'statistic_x':'statistic_metab', 'pvalue_x':'pvalue_metab', 'beta_x':'beta_metab'}, axis=1)

In [306]:
metabQTL_formatted.set_index(['geneSymbol', 'snpid'], inplace=True)

In [307]:
pQTL_formatted

Unnamed: 0_level_0,Unnamed: 1_level_0,statistic_prot,pvalue_prot,beta_prot
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NOC2L,rs2980300,2.263495,0.024436,0.029488
NOC2L,rs4970383,-0.078362,0.937601,-0.000840
NOC2L,rs4475691,-0.021986,0.982476,-0.000272
ISG15,rs1806509,-1.087520,0.277683,-0.030620
NOC2L,rs1806509,1.445340,0.149576,0.013592
...,...,...,...,...
SHANK3,rs3810648,-0.974206,0.330724,-0.010342
RABL2B,rs2285395,-0.010566,0.991581,-0.000572
SHANK3,rs2285395,-0.861260,0.389769,-0.009642
RABL2B,rs3865766,1.599020,0.111460,0.041947


In [308]:
metabQTL_formatted

Unnamed: 0_level_0,Unnamed: 1_level_0,statistics_metab,pvalue_metab,beta_metab
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AASDH,rs10000003,2.179230,0.030750,2.626286
PAICS,rs10000003,1.329355,0.185588,0.777456
POLR2B,rs10000003,1.329355,0.185588,0.777456
PPAT,rs10000003,2.179230,0.030750,2.626286
CDS1,rs10000005,-2.383190,0.018314,-2.262149
...,...,...,...,...
ADH5,rs9999976,2.321265,0.021511,4.875438
EIF4E,rs9999976,-2.565780,0.011195,-3.551833
SETD7,rs9999981,2.779724,0.006081,12.374678
ACSL1,rs9999995,-0.942792,0.347182,-0.005681


In [309]:
xQTLs.index.intersection(pQTL_formatted.index)

MultiIndex([(  'USP20', 'rs10119739'),
            ( 'COL4A1', 'rs10161783'),
            ( 'COL4A2', 'rs10161783'),
            ('JAKMIP1', 'rs10433662'),
            (  'RMDN2', 'rs10490621'),
            ( 'CTNNA2', 'rs10496231'),
            (  'APBB1',  'rs1050239'),
            (    'HPX',  'rs1050239'),
            (  'TRIM3',  'rs1050239'),
            ( 'CAMK1D', 'rs10508449'),
            ...
            (  'TUBG1',   'rs938671'),
            (  'MYH10',   'rs960206'),
            (   'SPEN',  'rs9633365'),
            ( 'SUCLG1',   'rs969647'),
            ( 'PLSCR4',  'rs9838866'),
            ( 'PLSCR4',  'rs9863647'),
            (   'ERC2',  'rs9873381'),
            ( 'CUEDC1',  'rs9893497'),
            ( 'DNAJC7',  'rs9911974'),
            ('NKIRAS2',  'rs9911974')],
           names=['geneSymbol', 'snpid'], length=285)

In [310]:
xQTLs.index.intersection(metabQTL_formatted.index)

MultiIndex([(   'PFAS',  'rs1001958'),
            ( 'POLR2A',  'rs1001958'),
            (  'HMOX2',  'rs1005345'),
            ( 'DNMT3B',  'rs1007123'),
            ( 'DNMT3B',  'rs1007124'),
            (    'GLS', 'rs10176621'),
            (  'HIBCH', 'rs10176621'),
            (    'GLS', 'rs10176649'),
            (  'HIBCH', 'rs10176649'),
            ( 'NT5C3A', 'rs10224734'),
            ...
            (   'NME2',  'rs9896780'),
            (  'COX10',  'rs9905722'),
            (  'MLST8',  'rs9921808'),
            (  'HMOX2',  'rs9930342'),
            (  'MLST8',  'rs9933986'),
            (  'ABCC3',   'rs994469'),
            ('B4GALT6',  'rs9944721'),
            ('B4GALT6',  'rs9948669'),
            ( 'ABCB10',  'rs9970564'),
            ( 'DNMT3B',   'rs998382')],
           names=['geneSymbol', 'snpid'], length=1264)

#### add GWAS summary statistic to xQTL

In [311]:
GWAS.columns

Index(['snps', 'pvalue', 'chr', 'BP', 'effect_allele', 'other_allele',
       'effect_allele_frequency', 'beta', 'SE', 'SNP_ID', 'GWAS_BETA',
       'GWAS_SE', 'GWAS_P', 'GWAX_UKBB_BETA', 'GWAX_UKBB_SE', 'GWAX_UKBB_P',
       'DIRECT', 'I2', 'HET_P', 'INFO', 'statistic_GWAS'],
      dtype='object')

In [312]:
GWAS['statistic_GWAS'] = GWAS['beta'] / GWAS['SE']

In [313]:
GWAS_formatted = GWAS[['statistic_GWAS', 'pvalue', 'beta', 'SNP_ID']].rename(
    {'pvalue':'pvalue_GWAS', 'beta':'beta_GWAS', 'SNP_ID':'snpid'}, axis=1)

In [314]:
GWAS_formatted

Unnamed: 0,statistic_GWAS,pvalue_GWAS,beta_GWAS,snpid
0,0.624550,0.532266,0.012680,rs61769339
1,-0.163141,0.870407,-0.006036,rs190214723
2,0.208924,0.834508,0.003903,rs12238997
3,-1.442129,0.149266,-0.039365,rs72631875
4,-1.255844,0.209173,-0.064142,rs181440659
...,...,...,...,...
10687072,-0.768985,0.441902,-0.015827,rs9616985
10687073,0.321851,0.747566,0.005819,rs144549712
10687074,0.686054,0.492679,0.008820,rs9616839
10687075,-1.661472,0.096619,-0.023551,rs200507571


In [315]:
xQTLs = xQTLs.reset_index(drop=True).merge(GWAS_formatted, on = 'snpid', how = 'left')

In [316]:
xQTLs

Unnamed: 0,statistic_methyl,pvalue_methyl,beta_methyl,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic_expr,...,A2,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition,statistic_GWAS,pvalue_GWAS,beta_GWAS
0,-4.206334,0.000040,-0.025389,4,165089161,rs10002677,4:165089161_T_C,ENSG00000145416,MARCH1,0.574238,...,C,0.071065,C,-1.0,protein_coding,163524298.0,164384050.0,-0.410766,0.681244,-0.007669
1,-4.206334,0.000040,-0.025389,4,165089209,rs10002697,4:165089209_T_C,ENSG00000145416,MARCH1,0.574779,...,C,0.071070,C,-1.0,protein_coding,163524298.0,164384050.0,-0.405764,0.684916,-0.007576
2,4.177991,0.000044,0.017688,4,3755589,rs10005414,4:3755589_C_T,ENSG00000087269,NOP14,-0.105813,...,T,0.340987,C,-1.0,protein_coding,2937933.0,2963385.0,-0.247638,0.804415,-0.002647
3,-4.206334,0.000040,-0.025389,4,165090247,rs10006034,4:165090247_T_G,ENSG00000145416,MARCH1,0.574240,...,G,0.071075,G,-1.0,protein_coding,163524298.0,164384050.0,-0.422018,0.673012,-0.007879
4,-4.776194,0.000004,-0.009351,4,163795426,rs10007144,4:163795426_A_C,ENSG00000164129,NPY5R,-1.267741,...,C,0.513884,A,1.0,protein_coding,163343939.0,163351934.0,-1.360252,0.173750,-0.012887
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24033,-4.233884,0.000035,-0.011278,4,47403236,rs9992306,4:47403236_T_C,ENSG00000170448,NFXL1,-0.814665,...,C,0.099746,T,-1.0,protein_coding,47847233.0,47914667.0,1.093150,0.274328,0.017488
24034,-4.172229,0.000045,-0.023988,4,165090228,rs9994011,4:165090228_C_A,ENSG00000145416,MARCH1,0.574237,...,A,0.071080,A,-1.0,protein_coding,163524298.0,164384050.0,0.030237,0.975878,0.000561
24035,-4.123829,0.000055,-0.018881,3,127173559,rs999468,3:127173559_C_A,ENSG00000070476,ZXDC,-0.912964,...,A,0.184226,C,-1.0,protein_coding,126437601.0,126475919.0,0.404713,0.685688,0.004863
24036,-4.121360,0.000056,-0.003995,17,29836324,rs999723,17:29836324_G_A,ENSG00000176390,CRLF3,0.293276,...,A,0.643915,A,-1.0,protein_coding,30769388.0,30824776.0,-1.383767,0.166430,-0.013639


In [317]:
xQTLs.set_index(['geneSymbol', 'snpid'], drop=False, inplace=True)

In [352]:
xQTLs_prot = xQTLs_prot.reset_index(drop=True).merge(GWAS_formatted, on = 'snpid', how = 'left')

In [353]:
xQTLs_prot

Unnamed: 0,beta_prot,pvalue_prot,geneSymbol,chromosome,snpLocation,snpid,snpLocId,gene,statistic_expr,pvalue_expr,...,A2freq,expressionIncreasingAllele,strand,geneBiotype,geneStartPosition,geneEndPosition,statistic_prot,statistic_GWAS,pvalue_GWAS,beta_GWAS
0,0.029488,0.024436,NOC2L,1,785989,rs2980300,1:785989_T_C,ENSG00000188976,-1.011814,3.116270e-01,...,0.836686,T,-1.0,protein_coding,944204.0,959309.0,2.263495,1.143132,0.252984,0.018185
1,-0.000840,0.937601,NOC2L,1,838555,rs4970383,1:838555_C_A,ENSG00000188976,-0.371254,7.104481e-01,...,0.267010,C,-1.0,protein_coding,944204.0,959309.0,-0.078362,0.709779,0.477841,0.009305
2,-0.000272,0.982476,NOC2L,1,846808,rs4475691,1:846808_C_T,ENSG00000188976,1.301034,1.932469e-01,...,0.204360,T,-1.0,protein_coding,944204.0,959309.0,-0.021986,0.303449,0.761548,0.004117
3,-0.030620,0.277683,ISG15,1,853954,rs1806509,1:853954_C_A,ENSG00000187608,2.833641,4.602094e-03,...,0.578350,A,1.0,protein_coding,1001138.0,1014541.0,-1.087520,-0.322354,0.747185,-0.003690
4,0.013592,0.149576,NOC2L,1,853954,rs1806509,1:853954_C_A,ENSG00000188976,-2.369994,1.778839e-02,...,0.578350,C,-1.0,protein_coding,944204.0,959309.0,1.445340,-0.322354,0.747185,-0.003690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
730518,-0.010342,0.330724,SHANK3,22,51175626,rs3810648,22:51175626_A_G,ENSG00000251322,0.093193,9.257500e-01,...,0.068315,G,1.0,protein_coding,50674415.0,50733298.0,-0.974206,0.118489,0.905680,0.002345
730519,-0.000572,0.991581,RABL2B,22,51178090,rs2285395,22:51178090_G_A,ENSG00000079974,10.901612,1.132329e-27,...,0.060391,A,-1.0,protein_coding,50767501.0,50783663.0,-0.010566,-0.367780,0.713037,-0.007806
730520,-0.009642,0.389769,SHANK3,22,51178090,rs2285395,22:51178090_G_A,ENSG00000251322,0.233557,8.153286e-01,...,0.060391,A,1.0,protein_coding,50674415.0,50733298.0,-0.861260,-0.367780,0.713037,-0.007806
730521,0.041947,0.111460,RABL2B,22,51186228,rs3865766,22:51186228_C_T,ENSG00000079974,2.521462,1.168683e-02,...,0.450743,T,-1.0,protein_coding,50767501.0,50783663.0,1.599020,0.852352,0.394019,0.009133


In [354]:
xQTLs_prot.set_index(['geneSymbol', 'snpid'], drop=False, inplace=True)

#### generate files for xQTLs

In [318]:
xQTLs_expr_methyl_metab = xQTLs.join(metabQTL_formatted, how = 'inner')

In [319]:
xQTLs_expr_methyl_metab

Unnamed: 0_level_0,Unnamed: 1_level_0,statistic_methyl,pvalue_methyl,beta_methyl,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic_expr,...,strand,geneBiotype,geneStartPosition,geneEndPosition,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistics_metab,pvalue_metab,beta_metab
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
PFAS,rs1001958,-3.981757,0.000097,-0.003156,17,7998809,rs1001958,17:7998809_G_A,ENSG00000178921,PFAS,0.128417,...,1.0,protein_coding,8247618.0,8270491.0,-1.190558,0.233827,-0.012508,2.506006,0.013191,7.944735
POLR2A,rs1001958,-4.004933,0.000088,-0.002794,17,7998809,rs1001958,17:7998809_G_A,ENSG00000181222,POLR2A,-0.763430,...,1.0,protein_coding,7484366.0,7514616.0,-1.190558,0.233827,-0.012508,2.506006,0.013191,7.944735
HMOX2,rs1005345,-4.137063,0.000052,-0.003982,16,5071284,rs1005345,16:5071284_G_A,ENSG00000103415,HMOX2,-0.735617,...,1.0,protein_coding,4474690.0,4510347.0,0.805447,0.420561,0.009750,-1.327270,0.186276,-12.562561
DNMT3B,rs1007123,4.079764,0.000066,0.004115,20,31345840,rs1007123,20:31345840_G_A,ENSG00000088305,DNMT3B,11.374587,...,1.0,protein_coding,32762385.0,32809356.0,-1.487540,0.136872,-0.014430,-2.261264,0.025065,-11.769479
DNMT3B,rs1007124,4.079764,0.000066,0.004115,20,31345752,rs1007124,20:31345752_G_C,ENSG00000088305,DNMT3B,11.348318,...,1.0,protein_coding,32762385.0,32809356.0,-1.500543,0.133474,-0.014557,-2.261264,0.025065,-11.769479
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ABCC3,rs994469,4.495444,0.000012,0.012815,17,49751383,rs994469,17:49751383_G_A,ENSG00000108846,ABCC3,0.351243,...,1.0,protein_coding,50634777.0,50692252.0,0.601550,0.547474,0.009508,-2.283497,0.023692,-53.403566
B4GALT6,rs9944721,5.051007,0.000001,0.014299,18,30248753,rs9944721,18:30248753_T_C,ENSG00000118276,B4GALT6,0.152788,...,-1.0,protein_coding,31622247.0,31685836.0,0.486996,0.626261,0.005584,-1.458831,0.146536,-0.915852
B4GALT6,rs9948669,5.051007,0.000001,0.014299,18,30246217,rs9948669,18:30246217_G_A,ENSG00000118276,B4GALT6,0.153225,...,-1.0,protein_coding,31622247.0,31685836.0,-0.426808,0.669519,-0.004897,-1.458831,0.146536,-0.915852
ABCB10,rs9970564,-4.100027,0.000061,-0.013120,1,228854703,rs9970564,1:228854703_A_C,ENSG00000135776,ABCB10,1.192923,...,-1.0,protein_coding,229516582.0,229558695.0,-1.043692,0.296628,-0.017854,1.634609,0.104061,0.121443


In [320]:
xQTLs_expr_methyl_metab.to_csv("analysis_using_meta_eQTL/xQTLs_expr_methyl_metab.csv", index=False)

In [321]:
xQTLs_expr_methyl_metab_prot = xQTLs_expr_methyl_metab.join(pQTL_formatted, how = 'inner')

In [322]:
xQTLs_expr_methyl_metab_prot

Unnamed: 0_level_0,Unnamed: 1_level_0,statistic_methyl,pvalue_methyl,beta_methyl,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic_expr,...,geneEndPosition,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistics_metab,pvalue_metab,beta_metab,statistic_prot,pvalue_prot,beta_prot
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
CHKA,rs105147,-3.975225,9.9e-05,-0.007487,11,67793014,rs105147,11:67793014_T_C,ENSG00000110721,CHKA,-0.10572,...,68121444.0,-0.939465,0.347492,-0.009042,1.215765,0.225832,0.049534,-0.281282,0.778726,-0.003666
PLCB4,rs11697318,4.123416,5.5e-05,0.010511,20,9132036,rs11697318,20:9132036_A_G,ENSG00000101333,PLCB4,0.06888,...,9481242.0,-1.928319,0.053815,-0.024873,-1.88999,0.060534,-11.112412,0.186505,0.852172,0.001548
CAMK2D,rs11724156,4.076895,6.7e-05,0.011657,4,114478482,rs11724156,4:114478482_T_C,ENSG00000145349,CAMK2D,-1.262335,...,113761927.0,1.547257,0.121801,0.017211,1.107513,0.269704,2.333487,-1.011263,0.31269,-0.006693
PLCB4,rs13044386,4.123416,5.5e-05,0.010511,20,9116155,rs13044386,20:9116155_G_A,ENSG00000101333,PLCB4,0.137961,...,9481242.0,1.880562,0.060032,0.023036,-1.88999,0.060534,-11.112412,0.159889,0.873074,0.001329
ACAA1,rs1384006,4.217958,3.8e-05,0.004121,3,38261037,rs1384006,3:38261037_G_A,ENSG00000060971,ACAA1,-2.160702,...,38137242.0,0.611855,0.540634,0.005834,1.528305,0.128375,6.86477,-1.709794,0.088318,-0.013714
PDE8B,rs251419,-4.042899,7.6e-05,-0.008941,5,76572671,rs251419,5:76572671_T_G,ENSG00000113231,PDE8B,-2.666045,...,77429807.0,-0.451519,0.651615,-0.00505,-1.70008,0.091023,-0.125062,0.407349,0.68405,0.00377
PDE8B,rs251421,-4.042899,7.6e-05,-0.008941,5,76572970,rs251421,5:76572970_A_G,ENSG00000113231,PDE8B,-2.721243,...,77429807.0,-0.460147,0.645411,-0.005134,-1.70008,0.091023,-0.125062,0.407349,0.68405,0.00377
PDE8B,rs251425,-3.983895,9.6e-05,-0.008817,5,76575548,rs251425,5:76575548_G_A,ENSG00000113231,PDE8B,-2.711736,...,77429807.0,0.449987,0.65272,0.005028,-1.70008,0.091023,-0.125062,0.296682,0.766921,0.00275
PDE8B,rs251427,-3.983895,9.6e-05,-0.008817,5,76576396,rs251427,5:76576396_T_G,ENSG00000113231,PDE8B,-2.482978,...,77429807.0,-0.503056,0.614925,-0.005618,-1.70008,0.091023,-0.125062,0.296682,0.766921,0.00275
GPD1L,rs4078487,-3.977665,9.8e-05,-0.01032,3,32245255,rs4078487,3:32245255_C_A,ENSG00000152642,GPD1L,0.798511,...,32168713.0,1.655144,0.097895,0.025642,2.383818,0.018284,12.304887,-0.085245,0.932123,-0.000977


In [323]:
xQTLs_expr_methyl_metab_prot.to_csv("analysis_using_meta_eQTL/xQTLs_expr_methyl_metab_prot.csv", index=False)

In [324]:
xQTLs_expr_methyl_prot = xQTLs.join(pQTL_formatted, how = 'inner')

In [325]:
xQTLs_expr_methyl_prot

Unnamed: 0_level_0,Unnamed: 1_level_0,statistic_methyl,pvalue_methyl,beta_methyl,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic_expr,...,strand,geneBiotype,geneStartPosition,geneEndPosition,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistic_prot,pvalue_prot,beta_prot
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
USP20,rs10119739,4.016134,0.000085,0.011001,9,132690613,rs10119739,9:132690613_A_G,ENSG00000136878,USP20,-0.602034,...,1.0,protein_coding,129834698.0,129881838.0,1.435506,0.151143,0.018719,3.404031,0.000754,0.043824
COL4A1,rs10161783,4.090714,0.000063,0.023471,13,111009055,rs10161783,13:111009055_C_T,ENSG00000187498,COL4A1,0.420458,...,-1.0,protein_coding,110148963.0,110307149.0,-2.333160,0.019640,-0.032043,0.707546,0.479775,0.011586
COL4A2,rs10161783,4.090714,0.000063,0.023471,13,111009055,rs10161783,13:111009055_C_T,ENSG00000134871,COL4A2,-0.359141,...,1.0,protein_coding,110305812.0,110513027.0,-2.333160,0.019640,-0.032043,0.533788,0.593876,0.007906
JAKMIP1,rs10433662,-4.370089,0.000020,-0.011709,4,5980110,rs10433662,4:5980110_T_C,ENSG00000152969,JAKMIP1,-0.922036,...,-1.0,protein_coding,6026199.0,6200591.0,1.093275,0.274273,0.012774,-2.095968,0.036906,-0.013818
RMDN2,rs10490621,-4.238548,0.000035,-0.014109,2,38336730,rs10490621,2:38336730_C_A,ENSG00000115841,RMDN2,-1.800133,...,1.0,protein_coding,37923187.0,38067142.0,-2.680925,0.007342,-0.050149,-0.977312,0.329187,-0.010999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
PLSCR4,rs9863647,4.031429,0.000080,0.013837,3,145967173,rs9863647,3:145967173_T_C,ENSG00000114698,PLSCR4,2.522625,...,-1.0,protein_coding,146192339.0,146251179.0,-0.536428,0.591663,-0.007354,3.303193,0.001069,0.062313
ERC2,rs9873381,4.214607,0.000038,0.011314,3,56489776,rs9873381,3:56489776_G_A,ENSG00000187672,ERC2,-0.092490,...,-1.0,protein_coding,55508308.0,56468363.0,0.075112,0.940125,0.000735,-0.711123,0.477549,-0.002640
CUEDC1,rs9893497,-4.091561,0.000063,-0.008658,17,56002077,rs9893497,17:56002077_A_G,ENSG00000180891,CUEDC1,0.024448,...,-1.0,protein_coding,57861243.0,57955323.0,0.168083,0.866518,0.001614,-0.343261,0.731821,-0.005733
DNAJC7,rs9911974,-4.292643,0.000028,-0.037424,17,40199535,rs9911974,17:40199535_T_G,ENSG00000168259,DNAJC7,-1.538995,...,-1.0,protein_coding,41976433.0,42021376.0,-2.348787,0.018835,-0.031045,-1.322625,0.186948,-0.007050


In [326]:
xQTLs_expr_methyl_prot.to_csv("analysis_using_meta_eQTL/xQTLs_expr_methyl_prot.csv", index=False)

In [327]:
xQTLs_expr_methyl_metab_filtered = xQTLs_expr_methyl_metab.loc[xQTLs_expr_methyl_metab.pvalue_expr < 1e-4]

In [328]:
xQTLs_expr_methyl_metab_filtered

Unnamed: 0_level_0,Unnamed: 1_level_0,statistic_methyl,pvalue_methyl,beta_methyl,chromosome,snpLocation,snpid,snpLocId,gene,geneSymbol,statistic_expr,...,strand,geneBiotype,geneStartPosition,geneEndPosition,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistics_metab,pvalue_metab,beta_metab
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
DNMT3B,rs1007123,4.079764,0.000066,0.004115,20,31345840,rs1007123,20:31345840_G_A,ENSG00000088305,DNMT3B,11.374587,...,1.0,protein_coding,32762385.0,32809356.0,-1.487540,0.136872,-0.014430,-2.261264,0.025065,-11.769479
DNMT3B,rs1007124,4.079764,0.000066,0.004115,20,31345752,rs1007124,20:31345752_G_C,ENSG00000088305,DNMT3B,11.348318,...,1.0,protein_coding,32762385.0,32809356.0,-1.500543,0.133474,-0.014557,-2.261264,0.025065,-11.769479
ACADS,rs10774565,4.028481,0.000081,0.009894,12,121099149,rs10774565,12:121099149_T_C,ENSG00000122971,ACADS,9.775675,...,1.0,protein_coding,120725735.0,120740008.0,-1.118446,0.263376,-0.015004,-2.414911,0.016846,-5.564403
ADK,rs11001051,-4.054027,0.000073,-0.007499,10,76243549,rs11001051,10:76243549_C_A,ENSG00000156110,ADK,-4.916910,...,1.0,protein_coding,74151185.0,74709303.0,1.507212,0.131756,0.016219,1.784702,0.076170,0.203476
ACADS,rs111409007,4.075141,0.000067,0.010207,12,121119095,rs111409007,12:121119095_C_T,ENSG00000122971,ACADS,8.880935,...,1.0,protein_coding,120725735.0,120740008.0,0.879022,0.379389,0.011735,-3.018829,0.002946,-7.002376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DNMT3B,rs910084,4.075227,0.000067,0.004177,20,31379665,rs910084,20:31379665_C_T,ENSG00000088305,DNMT3B,10.777169,...,1.0,protein_coding,32762385.0,32809356.0,-1.666355,0.095643,-0.016302,-1.817513,0.070975,-2.784711
DNMT3B,rs910085,4.075227,0.000067,0.004177,20,31383353,rs910085,20:31383353_T_G,ENSG00000088305,DNMT3B,10.572637,...,1.0,protein_coding,32762385.0,32809356.0,1.609339,0.107542,0.015739,-1.875935,0.062452,-2.878097
DNMT3B,rs927239,4.329370,0.000024,0.004873,20,31268924,rs927239,20:31268924_G_A,ENSG00000088305,DNMT3B,-6.027501,...,1.0,protein_coding,32762385.0,32809356.0,0.615395,0.538294,0.006062,-1.801808,0.073424,-10.596858
HEMK1,rs9838283,-4.023699,0.000082,-0.010633,3,50820486,rs9838283,3:50820486_G_A,ENSG00000114735,HEMK1,9.200060,...,1.0,protein_coding,50569152.0,50596168.0,0.500765,0.616536,0.007118,3.017857,0.002955,50.013139


In [329]:
xQTLs_expr_methyl_metab_filtered.to_csv("analysis_using_meta_eQTL/xQTLs_expr_methyl_metab_filtered_1e-4.csv", index=False)

In [355]:
xQTLs_expr_metab_prot = xQTLs_prot.join(metabQTL_formatted, how = 'inner')

In [356]:
xQTLs_expr_metab_prot

Unnamed: 0_level_0,Unnamed: 1_level_0,beta_prot,pvalue_prot,geneSymbol,chromosome,snpLocation,snpid,snpLocId,gene,statistic_expr,pvalue_expr,...,geneBiotype,geneStartPosition,geneEndPosition,statistic_prot,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistics_metab,pvalue_metab,beta_metab
geneSymbol,snpid,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
DVL1,rs7528416,0.000419,0.963906,DVL1,1,1171249,rs7528416,1:1171249_T_C,ENSG00000107404,1.338474,1.807421e-01,...,protein_coding,1335276.0,1349350.0,0.045290,-1.994677,0.046078,-0.032855,0.979838,0.328618,2.975937
DVL1,rs6697886,0.000403,0.960245,DVL1,1,1173611,rs6697886,1:1173611_G_A,ENSG00000107404,1.940041,5.237470e-02,...,protein_coding,1335276.0,1349350.0,0.049888,0.441435,0.658898,0.006154,-0.908323,0.365049,-7.354410
DVL1,rs4970364,-0.005612,0.397592,DVL1,1,1174282,rs4970364,1:1174282_T_C,ENSG00000107404,-0.105954,9.156187e-01,...,protein_coding,1335276.0,1349350.0,-0.847144,0.167264,0.867162,0.001886,-1.305791,0.193464,-8.278414
DVL1,rs6675798,-0.006418,0.474499,DVL1,1,1176597,rs6675798,1:1176597_T_C,ENSG00000107404,2.395601,1.659316e-02,...,protein_coding,1335276.0,1349350.0,-0.716084,0.240721,0.809771,0.003889,-0.712314,0.477289,-6.761995
DVL1,rs2297864,-0.006418,0.474499,DVL1,1,1177630,rs2297864,1:1177630_A_G,ENSG00000107404,2.237343,2.526396e-02,...,protein_coding,1335276.0,1349350.0,-0.716084,0.291528,0.770647,0.004730,-0.712314,0.477289,-6.761995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ARSA,rs6010061,0.042923,0.000769,ARSA,22,51151724,rs6010061,22:51151724_C_T,ENSG00000100299,6.310465,2.781990e-10,...,protein_coding,50622754.0,50628173.0,3.399442,-0.208219,0.835058,-0.002049,2.199838,0.029226,3.765515
ARSA,rs6010063,0.011650,0.401975,ARSA,22,51156933,rs6010063,22:51156933_A_G,ENSG00000100299,5.127080,2.942710e-07,...,protein_coding,50622754.0,50628173.0,0.839318,-1.637657,0.101493,-0.015725,2.191276,0.029851,0.082448
ARSA,rs10451,0.023319,0.120453,ARSA,22,51162059,rs10451,22:51162059_G_A,ENSG00000100299,4.196230,2.713946e-05,...,protein_coding,50622754.0,50628173.0,1.557408,0.500160,0.616962,0.005208,1.918129,0.056843,11.259549
ARSA,rs715586,-0.006474,0.757270,ARSA,22,51163138,rs715586,22:51163138_C_T,ENSG00000100299,0.930321,3.522047e-01,...,protein_coding,50622754.0,50628173.0,-0.309357,1.978756,0.047843,0.026554,2.571247,0.011027,0.150297


In [357]:
xQTLs_expr_metab_prot.columns

Index(['beta_prot', 'pvalue_prot', 'geneSymbol', 'chromosome', 'snpLocation',
       'snpid', 'snpLocId', 'gene', 'statistic_expr', 'pvalue_expr',
       'beta_expr', 'A1', 'A2', 'A2freq', 'expressionIncreasingAllele',
       'strand', 'geneBiotype', 'geneStartPosition', 'geneEndPosition',
       'statistic_prot', 'statistic_GWAS', 'pvalue_GWAS', 'beta_GWAS',
       'statistics_metab', 'pvalue_metab', 'beta_metab'],
      dtype='object')

In [358]:
xQTLs_expr_metab_prot.to_csv("analysis_using_meta_eQTL/xQTLs_expr_metab_prot.csv", index=False)

#### add a patch of methylQTL to xQTLs_expr_metab_prot

In [11]:
xQTLs_expr_metab_prot = pd.read_csv("analysis_using_meta_eQTL/xQTLs_expr_metab_prot.csv")

In [12]:
xQTLs_expr_metab_prot

Unnamed: 0,beta_prot,pvalue_prot,geneSymbol,chromosome,snpLocation,snpid,snpLocId,gene,statistic_expr,pvalue_expr,...,geneBiotype,geneStartPosition,geneEndPosition,statistic_prot,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistic_metab,pvalue_metab,beta_metab
0,0.000419,0.963906,DVL1,1,1171249,rs7528416,1:1171249_T_C,ENSG00000107404,1.338474,1.807421e-01,...,protein_coding,1335276.0,1349350.0,0.045290,-1.994677,0.046078,-0.032855,0.979838,0.328618,2.975937
1,0.000403,0.960245,DVL1,1,1173611,rs6697886,1:1173611_G_A,ENSG00000107404,1.940041,5.237470e-02,...,protein_coding,1335276.0,1349350.0,0.049888,0.441435,0.658898,0.006154,-0.908323,0.365049,-7.354410
2,-0.005612,0.397592,DVL1,1,1174282,rs4970364,1:1174282_T_C,ENSG00000107404,-0.105954,9.156187e-01,...,protein_coding,1335276.0,1349350.0,-0.847144,0.167264,0.867162,0.001886,-1.305791,0.193464,-8.278414
3,-0.006418,0.474499,DVL1,1,1176597,rs6675798,1:1176597_T_C,ENSG00000107404,2.395601,1.659316e-02,...,protein_coding,1335276.0,1349350.0,-0.716084,0.240721,0.809771,0.003889,-0.712314,0.477289,-6.761995
4,-0.006418,0.474499,DVL1,1,1177630,rs2297864,1:1177630_A_G,ENSG00000107404,2.237343,2.526396e-02,...,protein_coding,1335276.0,1349350.0,-0.716084,0.291528,0.770647,0.004730,-0.712314,0.477289,-6.761995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57092,0.042923,0.000769,ARSA,22,51151724,rs6010061,22:51151724_C_T,ENSG00000100299,6.310465,2.781990e-10,...,protein_coding,50622754.0,50628173.0,3.399442,-0.208219,0.835058,-0.002049,2.199838,0.029226,3.765515
57093,0.011650,0.401975,ARSA,22,51156933,rs6010063,22:51156933_A_G,ENSG00000100299,5.127080,2.942710e-07,...,protein_coding,50622754.0,50628173.0,0.839318,-1.637657,0.101493,-0.015725,2.191276,0.029851,0.082448
57094,0.023319,0.120453,ARSA,22,51162059,rs10451,22:51162059_G_A,ENSG00000100299,4.196230,2.713946e-05,...,protein_coding,50622754.0,50628173.0,1.557408,0.500160,0.616962,0.005208,1.918129,0.056843,11.259549
57095,-0.006474,0.757270,ARSA,22,51163138,rs715586,22:51163138_C_T,ENSG00000100299,0.930321,3.522047e-01,...,protein_coding,50622754.0,50628173.0,-0.309357,1.978756,0.047843,0.026554,2.571247,0.011027,0.150297


In [13]:
mQTL

Unnamed: 0,snps,gene,statistic,pvalue,FDR,beta
0,rs2069978,GPX4;GPX4;GPX4,9.889386,6.223929e-19,1.847805e-08,0.078782
1,rs2069978,GPX4;GPX4;GPX4;GPX4;GPX4,9.607171,4.019592e-18,5.966828e-08,0.109791
2,rs2069984,GPX4;GPX4;GPX4,9.085744,1.201035e-16,5.942866e-07,0.070466
3,rs2069987,GPX4;GPX4;GPX4,9.085744,1.201035e-16,5.942866e-07,0.070466
4,rs60428566,GPX4;GPX4;GPX4,9.085744,1.201035e-16,5.942866e-07,0.070466
...,...,...,...,...,...,...
30888009,rs1355106,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573
30888010,rs1355107,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573
30888011,rs12499737,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573
30888012,rs12504629,PDE4DIP;PDE4DIP;PDE4DIP;PDE4DIP;NBPF20;NBPF9,-3.341382,1.000000e-03,9.611729e-01,-0.013573


In [15]:
gene_group_to_gene

Unnamed: 0_level_0,gene
Symbol,Unnamed: 1_level_1
ASIC2,AA06;ASIC2
AA06,AA06;ASIC2
AACS,AACS
AACS,AACS;AACS
AACSL,AACSL
...,...
ZPLD1,ZPLD1
TUBGCP4,ZSCAN29;TUBGCP4
ZSCAN29,ZSCAN29;TUBGCP4
TUBGCP4,ZSCAN29;TUBGCP4;TUBGCP4


In [21]:
xQTLs_expr_metab_prot.geneSymbol

0        DVL1
1        DVL1
2        DVL1
3        DVL1
4        DVL1
         ... 
57092    ARSA
57093    ARSA
57094    ARSA
57095    ARSA
57096    ARSA
Name: geneSymbol, Length: 57097, dtype: object

In [24]:
gene_group_to_gene_filtered = gene_group_to_gene[
    gene_group_to_gene.index.isin(np.unique(xQTLs_expr_metab_prot.geneSymbol))]

In [25]:
gene_group_to_gene_filtered

Unnamed: 0_level_0,gene
Symbol,Unnamed: 1_level_1
ASIC2,AA06;ASIC2
AACS,AACS
AACS,AACS;AACS
AADAT,AADAT;AADAT
AADAT,AADAT;AADAT;AADAT
...,...
CSAD,ZNF740;ZNF740;CSAD;CSAD
PLOD3,ZNHIT1;PLOD3
PLOD3,ZNHIT1;PLOD3;PLOD3
PLOD3,ZNHIT1;PLOD3;ZNHIT1


In [29]:
xQTLs_expr_metab_prot_with_gene_groups = xQTLs_expr_metab_prot.merge(gene_group_to_gene_filtered,
                                                                     left_on = ['geneSymbol'], 
                                                                     right_on = ['Symbol'],
                                                                     how = 'inner',
                                                                     suffixes=('', '_y'))

In [30]:
xQTLs_expr_metab_prot_with_gene_groups

Unnamed: 0,beta_prot,pvalue_prot,geneSymbol,chromosome,snpLocation,snpid,snpLocId,gene,statistic_expr,pvalue_expr,...,geneStartPosition,geneEndPosition,statistic_prot,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistic_metab,pvalue_metab,beta_metab,gene_y
0,0.000419,0.963906,DVL1,1,1171249,rs7528416,1:1171249_T_C,ENSG00000107404,1.338474,0.180742,...,1335276.0,1349350.0,0.045290,-1.994677,0.046078,-0.032855,0.979838,0.328618,2.975937,DVL1
1,0.000403,0.960245,DVL1,1,1173611,rs6697886,1:1173611_G_A,ENSG00000107404,1.940041,0.052375,...,1335276.0,1349350.0,0.049888,0.441435,0.658898,0.006154,-0.908323,0.365049,-7.354410,DVL1
2,-0.005612,0.397592,DVL1,1,1174282,rs4970364,1:1174282_T_C,ENSG00000107404,-0.105954,0.915619,...,1335276.0,1349350.0,-0.847144,0.167264,0.867162,0.001886,-1.305791,0.193464,-8.278414,DVL1
3,-0.006418,0.474499,DVL1,1,1176597,rs6675798,1:1176597_T_C,ENSG00000107404,2.395601,0.016593,...,1335276.0,1349350.0,-0.716084,0.240721,0.809771,0.003889,-0.712314,0.477289,-6.761995,DVL1
4,-0.006418,0.474499,DVL1,1,1177630,rs2297864,1:1177630_A_G,ENSG00000107404,2.237343,0.025264,...,1335276.0,1349350.0,-0.716084,0.291528,0.770647,0.004730,-0.712314,0.477289,-6.761995,DVL1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235567,-0.006474,0.757270,ARSA,22,51163138,rs715586,22:51163138_C_T,ENSG00000100299,0.930321,0.352205,...,50622754.0,50628173.0,-0.309357,1.978756,0.047843,0.026554,2.571247,0.011027,0.150297,ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA
235568,-0.006474,0.757270,ARSA,22,51163138,rs715586,22:51163138_C_T,ENSG00000100299,0.930321,0.352205,...,50622754.0,50628173.0,-0.309357,1.978756,0.047843,0.026554,2.571247,0.011027,0.150297,ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA
235569,0.024978,0.095344,ARSA,22,51165664,rs8137951,22:51165664_G_A,ENSG00000100299,4.353607,0.000013,...,50622754.0,50628173.0,1.673256,0.504564,0.613865,0.005228,2.074059,0.039646,12.160708,ARSA;ARSA;ARSA;ARSA;ARSA
235570,0.024978,0.095344,ARSA,22,51165664,rs8137951,22:51165664_G_A,ENSG00000100299,4.353607,0.000013,...,50622754.0,50628173.0,1.673256,0.504564,0.613865,0.005228,2.074059,0.039646,12.160708,ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA;ARSA


In [31]:
xQTLs_expr_metab_prot_methyl = xQTLs_expr_metab_prot_with_gene_groups.merge(mQTL,
                                             left_on = ['gene_y', 'snpid'], 
                                             right_on = ['gene', 'snps'],
                                             how = 'inner',
                                             suffixes=('', '_mQTL'))

In [32]:
xQTLs_expr_metab_prot_methyl

Unnamed: 0,beta_prot,pvalue_prot,geneSymbol,chromosome,snpLocation,snpid,snpLocId,gene,statistic_expr,pvalue_expr,...,statistic_metab,pvalue_metab,beta_metab,gene_y,snps,gene_mQTL,statistic,pvalue,FDR,beta
0,-0.054173,1.791532e-02,CDA,1,20884941,rs12134554,1:20884941_T_C,ENSG00000158825,0.542508,0.587469,...,-2.118558,0.035644,-32.344288,CDA,rs12134554,CDA,-3.508221,0.000561,0.944759,-0.017089
1,-0.011035,4.910495e-01,CDA,1,20893965,rs472055,1:20893965_G_A,ENSG00000158825,-2.480295,0.013127,...,-2.476995,0.014270,-10.452803,CDA,rs472055,CDA,3.968124,0.000102,0.866868,0.013397
2,-0.009748,2.573151e-01,PRKAA2,1,57216376,rs6696458,1:57216376_G_A,ENSG00000162409,-0.245578,0.806009,...,2.341422,0.020420,0.131697,PRKAA2,rs6696458,PRKAA2,3.792661,0.000199,0.901952,0.014877
3,-0.007009,3.718588e-01,PRKAA2,1,57221878,rs2298127,1:57221878_A_C,ENSG00000162409,-1.790089,0.073440,...,-2.102151,0.037077,-37.214072,PRKAA2,rs2298127,PRKAA2,3.344187,0.000990,0.960962,0.011546
4,-0.007009,3.718588e-01,PRKAA2,1,57231991,rs12410615,1:57231991_G_A,ENSG00000162409,-1.818491,0.068989,...,-2.102151,0.037077,-37.214072,PRKAA2,rs12410615,PRKAA2,3.344187,0.000990,0.960962,0.011546
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,0.079164,2.429989e-13,GART,21,34821372,rs2834216,21:34821372_A_G,ENSG00000159131,3.346643,0.000818,...,1.735614,0.084522,2.304386,SON;GART;SON;GART;GART;GART;GART;GART,rs2834216,SON;GART;SON;GART;GART;GART;GART;GART,-3.546968,0.000489,0.939947,-0.003990
303,0.133763,4.788447e-30,GART,21,34892163,rs2409495,21:34892163_A_G,ENSG00000159131,3.298330,0.000973,...,2.272207,0.024381,3.322802,SON;GART;SON;GART;GART;GART;GART;GART,rs2409495,SON;GART;SON;GART;GART;GART;GART;GART,-3.473863,0.000633,0.948285,-0.004264
304,0.125477,5.285556e-30,GART,21,34894623,rs2834234,21:34894623_T_C,ENSG00000159131,3.600479,0.000318,...,1.772149,0.078238,2.620384,SON;GART;SON;GART;GART;GART;GART;GART,rs2834234,SON;GART;SON;GART;GART;GART;GART;GART,-3.375746,0.000889,0.958191,-0.004038
305,0.122274,3.252704e-29,GART,21,34986862,rs6517183,21:34986862_G_A,ENSG00000159131,3.935685,0.000083,...,-1.739548,0.083826,-8.396733,SON;GART;SON;GART;GART;GART;GART;GART,rs6517183,SON;GART;SON;GART;GART;GART;GART;GART,-3.858935,0.000155,0.889446,-0.004245


In [33]:
xQTLs_expr_metab_prot_methyl.columns

Index(['beta_prot', 'pvalue_prot', 'geneSymbol', 'chromosome', 'snpLocation',
       'snpid', 'snpLocId', 'gene', 'statistic_expr', 'pvalue_expr',
       'beta_expr', 'A1', 'A2', 'A2freq', 'expressionIncreasingAllele',
       'strand', 'geneBiotype', 'geneStartPosition', 'geneEndPosition',
       'statistic_prot', 'statistic_GWAS', 'pvalue_GWAS', 'beta_GWAS',
       'statistic_metab', 'pvalue_metab', 'beta_metab', 'gene_y', 'snps',
       'gene_mQTL', 'statistic', 'pvalue', 'FDR', 'beta'],
      dtype='object')

In [35]:
xQTLs_expr_metab_prot_methyl = xQTLs_expr_metab_prot_methyl.rename({'statistic': 'statistic_methyl',
                                                                    'pvalue': 'pvalue_methyl',
                                                                    'beta': 'beta_methyl'},
            axis=1).drop(['gene_y', 'snps', 'gene_mQTL', 'FDR'], axis=1)

In [36]:
xQTLs_expr_metab_prot_methyl

Unnamed: 0,beta_prot,pvalue_prot,geneSymbol,chromosome,snpLocation,snpid,snpLocId,gene,statistic_expr,pvalue_expr,...,statistic_prot,statistic_GWAS,pvalue_GWAS,beta_GWAS,statistic_metab,pvalue_metab,beta_metab,statistic_methyl,pvalue_methyl,beta_methyl
0,-0.054173,1.791532e-02,CDA,1,20884941,rs12134554,1:20884941_T_C,ENSG00000158825,0.542508,0.587469,...,-2.383621,0.949027,0.342607,0.014007,-2.118558,0.035644,-32.344288,-3.508221,0.000561,-0.017089
1,-0.011035,4.910495e-01,CDA,1,20893965,rs472055,1:20893965_G_A,ENSG00000158825,-2.480295,0.013127,...,-0.689691,-1.990737,0.046510,-0.018911,-2.476995,0.014270,-10.452803,3.968124,0.000102,0.013397
2,-0.009748,2.573151e-01,PRKAA2,1,57216376,rs6696458,1:57216376_G_A,ENSG00000162409,-0.245578,0.806009,...,-1.134877,2.153163,0.031306,0.027700,2.341422,0.020420,0.131697,3.792661,0.000199,0.014877
3,-0.007009,3.718588e-01,PRKAA2,1,57221878,rs2298127,1:57221878_A_C,ENSG00000162409,-1.790089,0.073440,...,-0.894310,-1.250113,0.211258,-0.014205,-2.102151,0.037077,-37.214072,3.344187,0.000990,0.011546
4,-0.007009,3.718588e-01,PRKAA2,1,57231991,rs12410615,1:57231991_G_A,ENSG00000162409,-1.818491,0.068989,...,-0.894310,1.217821,0.223292,0.013833,-2.102151,0.037077,-37.214072,3.344187,0.000990,0.011546
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302,0.079164,2.429989e-13,GART,21,34821372,rs2834216,21:34821372_A_G,ENSG00000159131,3.346643,0.000818,...,7.662269,1.042058,0.297385,0.010693,1.735614,0.084522,2.304386,-3.546968,0.000489,-0.003990
303,0.133763,4.788447e-30,GART,21,34892163,rs2409495,21:34892163_A_G,ENSG00000159131,3.298330,0.000973,...,12.720786,1.025291,0.305226,0.012303,2.272207,0.024381,3.322802,-3.473863,0.000633,-0.004264
304,0.125477,5.285556e-30,GART,21,34894623,rs2834234,21:34894623_T_C,ENSG00000159131,3.600479,0.000318,...,12.708979,1.173989,0.240400,0.013047,1.772149,0.078238,2.620384,-3.375746,0.000889,-0.004038
305,0.122274,3.252704e-29,GART,21,34986862,rs6517183,21:34986862_G_A,ENSG00000159131,3.935685,0.000083,...,12.491291,-0.946790,0.343746,-0.010173,-1.739548,0.083826,-8.396733,-3.858935,0.000155,-0.004245


In [37]:
np.unique(xQTLs_expr_metab_prot_methyl.geneSymbol)

array(['AADAT', 'AASS', 'ABCB8', 'ABCG1', 'ACAA1', 'ACAD8', 'ACADS',
       'ADI1', 'ADK', 'AGPS', 'ALDH3A2', 'ALDH9A1', 'AMACR', 'AMD1',
       'AQP1', 'ARG2', 'ASIC2', 'ASL', 'ASPA', 'CACNA1B', 'CAMK2D', 'CDA',
       'CHDH', 'CHKA', 'CMPK2', 'COASY', 'DGKB', 'DGKE', 'DLD', 'EGFR',
       'ENOPH1', 'ENPP2', 'FZD7', 'GART', 'GNAS', 'GPD1L', 'GRM5', 'HAAO',
       'HNMT', 'HTR2A', 'IARS2', 'IDH1', 'ITPA', 'L2HGDH', 'MAP2K2',
       'MGST3', 'MTHFD1L', 'NADSYN1', 'NEU3', 'NME3', 'OGDH', 'PAFAH1B3',
       'PAICS', 'PDE10A', 'PDE4B', 'PDE4D', 'PDE8B', 'PDGFRB', 'PLCB2',
       'PLCB4', 'PLD1', 'PPAT', 'PRKAA2', 'PRKCB', 'PRKG1', 'RPTOR',
       'SAT2', 'SETMAR', 'TK2', 'TSC1', 'TUBA4A', 'TUBB6', 'UCKL1'],
      dtype=object)

In [38]:
xQTLs_expr_metab_prot_methyl.to_csv("analysis_using_meta_eQTL/xQTLs_expr_metab_prot_methyl.csv", index=False)