# GWAS Locus Browser Psychencode QTL and GWAS Gene Data
- **Author(s)** - Frank Grenn
- **Date Started** - March 2020
- **Quick Description:** collect Psychencode QTL and GWAS data for genes
- **Data:** [psychENCODE](http://resource.psychencode.org/)

In [1]:
library(data.table)
library(dplyr)
library("EnsDb.Hsapiens.v86")


Attaching package: 'dplyr'


The following objects are masked from 'package:data.table':

    between, first, last


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Loading required package: ensembldb

Loading required package: BiocGenerics

Loading required package: parallel


Attaching package: 'BiocGenerics'


The following objects are masked from 'package:parallel':

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


The following objects are masked from 'package:dplyr':

    combine, intersect, setdiff, union


The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs


The following objects are masked from 'package:base':

    Filter, Find, Map, Position, Reduce, anyDuplicated, append,
    as.data.frame, bas

## 1) Gene List

In [107]:
evidence <- fread("$PATH1/genes_by_locus.csv")


evidence_genes <- evidence$Gene

length(evidence_genes)
head(evidence_genes)


## 2) get the gwas data for (a) meta5 or (b) progression1 or (c) progression2

#### (a) meta5

In [36]:
#META5
gwas_in = paste0('$PATH2/resultsForSmr_filtered.tab.gz')


gwas_smr = fread(gwas_in)
dim(gwas_smr)
head(gwas_smr)

#only want the meta5 genes
evidence_genes <- evidence[which(evidence$Locusnumber!="prog1" & evidence$Locusnumber!="prog2"),]$Gene
length(evidence_genes)

SNP,A1,A2,freq,b,se,p,N
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>
rs7899632,A,G,0.5665,0.011,0.0095,0.2476,1474097
rs61875309,A,C,0.7953,-0.0091,0.0116,0.4295,1474097
rs150203744,T,C,0.014,-0.0152,0.0649,0.8147,1351069
rs111551711,T,C,0.9868,0.0347,0.0742,0.6396,777210
rs12258651,T,G,0.8819,-0.0011,0.0149,0.9423,1474097
rs72828461,A,G,0.9605,-0.0018,0.0325,0.9569,1365107


In [48]:
#ignore SNPs with no rsid
gwas_smr_rsid <- gwas_smr %>% dplyr::filter(SNP!=".")
dim(gwas_smr_rsid)
head(gwas_smr_rsid)

Unnamed: 0_level_0,SNP,A1,A2,freq,b,se,p,N
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>
1,rs7899632,A,G,0.5665,0.011,0.0095,0.2476,1474097
2,rs61875309,A,C,0.7953,-0.0091,0.0116,0.4295,1474097
3,rs150203744,T,C,0.014,-0.0152,0.0649,0.8147,1351069
4,rs111551711,T,C,0.9868,0.0347,0.0742,0.6396,777210
5,rs12258651,T,G,0.8819,-0.0011,0.0149,0.9423,1474097
6,rs72828461,A,G,0.9605,-0.0018,0.0325,0.9569,1365107


In [49]:
#the file has duplicates for some reason, so remove them
gwas_smr_unique <- unique(gwas_smr_rsid)
dim(gwas_smr_unique)
head(gwas_smr_unique)

Unnamed: 0_level_0,SNP,A1,A2,freq,b,se,p,N
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>
1,rs7899632,A,G,0.5665,0.011,0.0095,0.2476,1474097
2,rs61875309,A,C,0.7953,-0.0091,0.0116,0.4295,1474097
3,rs150203744,T,C,0.014,-0.0152,0.0649,0.8147,1351069
4,rs111551711,T,C,0.9868,0.0347,0.0742,0.6396,777210
5,rs12258651,T,G,0.8819,-0.0011,0.0149,0.9423,1474097
6,rs72828461,A,G,0.9605,-0.0018,0.0325,0.9569,1365107


In [52]:
snp_info = fread("$PATH3/HRC_RS_conversion_final_new_imputation_server.txt")
dim(snp_info)
head(snp_info)

ID,ID2
<chr>,<chr>
rs571093408,1:13380:C:G
rs541172944,1:16071:G:A
rs529651976,1:16141:C:T
rs200943160,1:49298:T:C
rs140052487,1:54353:C:A
rs558796213,1:54564:G:T


we need chr:bp:ref:alt for merging later

In [59]:
#merging the two will increase the row count because rsids in snp_info have multiple chr:bp:ref:alt but this shouldn't be an issue in the end
gwas = merge(x=gwas_smr_unique, y=snp_info, by.x = "SNP", by.y = "ID", all.x=T)
dim(gwas)
head(gwas)

Unnamed: 0_level_0,SNP,A1,A2,freq,b,se,p,N,ID2
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,rs10,A,C,0.0544,0.0018,0.0259,0.9443,1460059,7:92383888:A:C
2,rs1000000,A,G,0.2175,-0.0248,0.0121,0.04058,1460059,12:126890980:G:A
3,rs10000000,A,T,0.9316,-0.0194,0.0252,0.4409,1364108,4:40088896:A:T
4,rs10000003,A,G,0.2997,0.0151,0.0105,0.1515,1474097,4:57561647:A:G
5,rs10000005,A,G,0.5504,0.0046,0.0101,0.6517,1460059,4:85161558:G:A
6,rs10000006,T,C,0.9625,0.0561,0.0284,0.0482,1460059,4:108826383:T:C


#### (b) progression1

read in progression gwas data

In [79]:
#Progression Loci
##rs61863020, 10:112956055, prog1
gwas_in = paste0('$PATH1/locuszoom/base_INS.txt')
evidence_genes <- evidence[which(evidence$Locusnumber=="prog1"),]$Gene

print(length(evidence_genes))
print(evidence_genes)

gwas = fread(gwas_in)

# change the headers a bit
if('ID' %in% names(gwas)){
  gwas = gwas %>% mutate(SNP = ID) %>% mutate(p = P)
}

if('P' %in% names(gwas)){
    gwas = gwas %>% mutate(p = P)
}

head(gwas)
dim(gwas)

[1] 15
 [1] "ADRA2A"    "BBIP1"     "DUSP5"     "GPAM"      "HEAT2"     "MIR4680"  
 [7] "MIR548E"   "MXI1"      "PDCD4"     "PDCD4-AS1" "RBM20"     "RPL13AP6" 
[13] "SHOC2"     "SMC3"      "SMNDC1"   


Unnamed: 0_level_0,SNP,BETA,SE,P,N,NSTUDY,Isq,p
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<int>,<int>,<dbl>,<dbl>
1,5:85928892,-0.0963,0.2209,0.6629,1033,5,0.0,0.6629
2,2:170966953,0.4332,0.37,0.2417,1736,5,40.8,0.2417
3,10:128341232,-0.0573,0.107,0.5919,1033,5,0.0,0.5919
4,3:62707519,-0.3537,0.2601,0.1738,1033,5,0.0,0.1738
5,2:80464120,-0.1522,0.5069,0.7641,931,4,0.0,0.7641
6,18:51112281,0.0078,0.1181,0.9476,1033,5,0.0,0.9476


merge progression loci summary stats with reference.txt to get rsids

In [80]:
reference = fread("$PATH1/locuszoom/reference.txt")
dim(reference)
head(reference)


SNP,RSID,CHR,START,REF,ALT,MAF,FUNC,NearGENE
<chr>,<chr>,<int>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>
1:14470,,1,14470,G,A,0.0263,ncRNA_exonic,WASH7P
1:14671,rs201055865,1,14671,G,C,0.0156,ncRNA_exonic,WASH7P
1:14773,rs878915777,1,14773,C,T,0.0178,ncRNA_exonic,WASH7P
1:16841,rs62636368,1,16841,G,T,0.0725,ncRNA_intronic,WASH7P
1:16856,rs3891260,1,16856,A,G,0.0199,ncRNA_splicing,WASH7P
1:17147,rs867691030,1,17147,G,A,0.0448,ncRNA_intronic,WASH7P


In [81]:
gwas = merge(x = gwas, y = reference, by = "SNP", all.x = TRUE)

gwas$ID2 <- paste0(gwas$SNP,":",gwas$REF,":",gwas$ALT)
gwas$SNP <- NULL
gwas$P <- NULL


colnames(gwas)[colnames(gwas)=="RSID"] <- "SNP"

head(gwas)
dim(gwas)

Unnamed: 0_level_0,BETA,SE,N,NSTUDY,Isq,p,SNP,CHR,START,REF,ALT,MAF,FUNC,NearGENE,ID2
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<int>,<dbl>,<dbl>,<chr>,<int>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>
1,-0.041,0.1768,376,1,0,0.8168,rs6602381,10,10000018,A,G,0.4459,intergenic,LOC101928272;LOC101928298,10:10000018:A:G
2,-0.0029,0.0715,2220,8,0,0.9676,rs7899632,10,100000625,A,G,0.4289,intronic,R3HCC1L,10:100000625:A:G
3,-0.0997,0.0896,2220,8,0,0.2659,rs61875309,10,100000645,A,C,0.1999,intronic,R3HCC1L,10:100000645:A:C
4,-0.0129,0.5899,555,3,0,0.9825,rs150203744,10,100001867,C,T,0.0163,intronic,R3HCC1L,10:100001867:C:T
5,-0.3516,0.5684,769,3,0,0.5362,rs111551711,10,100002464,T,C,0.0166,intronic,R3HCC1L,10:100002464:T:C
6,-0.0228,0.1204,2220,8,0,0.8501,rs12258651,10,100003242,T,G,0.1278,intronic,R3HCC1L,10:100003242:T:G


#### (c) progression2

read in progression gwas data

In [108]:
#Progression Loci
##rs382940, 9:108058562, prog2
gwas_in = paste0('$PATH1/locuszoom/surv_HY3.txt')
evidence_genes <- evidence[which(evidence$Locusnumber=="prog2"),]$Gene




print(length(evidence_genes))
print(evidence_genes)

gwas = fread(gwas_in)

# change the headers a bit
if('ID' %in% names(gwas)){
  gwas = gwas %>% mutate(SNP = ID) %>% mutate(p = P)
}

if('P' %in% names(gwas)){
    gwas = gwas %>% mutate(p = P)
}

head(gwas)
dim(gwas)

[1] 17
 [1] "ABCA1"      "FKTN"       "FSD1L"      "NIPSNAP3A"  "NIPSNAP3B" 
 [6] "OR13C2"     "OR13C3"     "OR13C4"     "OR13C5"     "OR13C8"    
[11] "OR13C9"     "OR13D1"     "OR13F1"     "RALGAPA1P1" "SLC44A1"   
[16] "TAL2"       "TMEM38B"   


Unnamed: 0_level_0,SNP,BETA,SE,P,N,NSTUDY,Isq,p
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<int>,<int>,<dbl>,<dbl>
1,5:29439275,-0.0322,0.0657,0.6241,2582,9,14.3,0.6241
2,5:85928892,0.2634,0.1526,0.08427,1299,5,0.0,0.08427
3,2:170966953,0.4025,0.287,0.1608,2265,8,0.0,0.1608
4,10:128341232,-0.1408,0.0783,0.07199,1299,5,46.5,0.07199
5,3:62707519,-0.1344,0.1723,0.4355,1299,5,0.0,0.4355
6,2:80464120,0.2888,0.2789,0.3004,1299,5,0.0,0.3004


merge progression loci summary stats with reference.txt to get rsids

In [109]:
reference = fread("$PATH1/locuszoom/reference.txt")
dim(reference)
head(reference)


SNP,RSID,CHR,START,REF,ALT,MAF,FUNC,NearGENE
<chr>,<chr>,<int>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>
1:14470,,1,14470,G,A,0.0263,ncRNA_exonic,WASH7P
1:14671,rs201055865,1,14671,G,C,0.0156,ncRNA_exonic,WASH7P
1:14773,rs878915777,1,14773,C,T,0.0178,ncRNA_exonic,WASH7P
1:16841,rs62636368,1,16841,G,T,0.0725,ncRNA_intronic,WASH7P
1:16856,rs3891260,1,16856,A,G,0.0199,ncRNA_splicing,WASH7P
1:17147,rs867691030,1,17147,G,A,0.0448,ncRNA_intronic,WASH7P


In [110]:
gwas = merge(x = gwas, y = reference, by = "SNP", all.x = TRUE)

gwas$ID2 <- paste0(gwas$SNP,":",gwas$REF,":",gwas$ALT)
gwas$SNP <- NULL
gwas$P <- NULL

colnames(gwas)[colnames(gwas)=="RSID"] <- "SNP"

head(gwas)
dim(gwas)

Unnamed: 0_level_0,BETA,SE,N,NSTUDY,Isq,p,SNP,CHR,START,REF,ALT,MAF,FUNC,NearGENE,ID2
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<int>,<dbl>,<dbl>,<chr>,<int>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>
1,-0.1558,0.1759,363,1,0.0,0.3759,rs6602381,10,10000018,A,G,0.4459,intergenic,LOC101928272;LOC101928298,10:10000018:A:G
2,0.0032,0.0655,2582,9,13.2,0.9606,rs7899632,10,100000625,A,G,0.4289,intronic,R3HCC1L,10:100000625:A:G
3,0.015,0.0765,2582,9,0.0,0.8447,rs61875309,10,100000645,A,C,0.1999,intronic,R3HCC1L,10:100000645:A:C
4,0.2703,0.3834,936,4,26.6,0.4809,rs150203744,10,100001867,C,T,0.0163,intronic,R3HCC1L,10:100001867:C:T
5,0.8956,0.3042,1352,5,15.5,0.003241,rs111551711,10,100002464,T,C,0.0166,intronic,R3HCC1L,10:100002464:T:C
6,0.0426,0.1108,2582,9,14.1,0.7008,rs12258651,10,100003242,T,G,0.1278,intronic,R3HCC1L,10:100003242:T:G


## 3) Get Risk Variant Data

In [25]:
#evidence <- fread("$PATH1/genes_by_locus.csv")
meta5_data <- fread("$PATH1/GWAS_loci_overview.csv")

meta5_data <- meta5_data %>% dplyr::select("Locus Number", "SNP", "CHR","CHR:BP (hg19)")
colnames(meta5_data) <- c("Locus Number", "SNP", "CHR", "CHR:BP")

prog_data <- fread("$PATH1/ProgressionLoci.csv")

prog_data <- prog_data %>% dplyr::select("Locus Number", "RSID", "CHR", "SNP")

colnames(prog_data) <- c("Locus Number", "SNP", "CHR", "CHR:BP")


In [26]:
#combine all the rsids to one df
variant_data <- rbind(meta5_data, prog_data)
dim(variant_data)
head(variant_data)

Locus Number,SNP,CHR,CHR:BP
<chr>,<chr>,<int>,<chr>
1,rs114138760,1,1:154898185
1,rs35749011,1,1:155135036
1,rs76763715,1,1:155205634
2,rs6658353,1,1:161469054
3,rs11578699,1,1:171719769
4,rs823118,1,1:205723572


## 4) eQTL
read the psychencode eQTL data  
should be similar to the blood and brain eQTL from before

In [111]:
#make a output folder depending on the gwas
#out_dir = "$PATH1/qtl/psychencode/eqtl_tsv/meta5"
#out_dir = "$PATH1/qtl/psychencode/eqtl_tsv/prog1"
out_dir = "$PATH1/qtl/psychencode/eqtl_tsv/prog2"
dir.create(out_dir)

In [112]:
eqtl = fread('$PATH1/qtl/psychencode/DER-08a_hg19_eQTL.significant.txt')

dim(eqtl)
head(eqtl)


gene_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-365581,3:28763306,chr3,28763306,28763306,0.000270335,0.0886898,1,0.0197905
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-364465,3:28764422,chr3,28764422,28764422,0.00034422,0.0868007,0,0.02424459
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-364376,3:28764511,chr3,28764511,28764511,0.000523559,0.084103,0,0.0343167
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-357186,3:28771701,chr3,28771701,28771701,0.000606794,0.0861309,0,0.03870841
ENSG00000215004.3,chr3,29128887,29128887,+,4558,276595,3:29405482,chr3,29405482,29405482,0.000509309,-0.507667,0,0.03355055
ENSG00000215004.3,chr3,29128887,29128887,+,4558,753189,3:29882076,chr3,29882076,29882076,0.000525076,0.0807715,0,0.0343985


#### we need to get the gene names that match the gene_id

In [113]:
#add a gene id col without the version number to join on later
eqtl$gene_id_no_version <- gsub("\\..*","",eqtl$gene_id)
dim(eqtl)
head(eqtl)

gene_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR,gene_id_no_version
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>,<chr>
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-365581,3:28763306,chr3,28763306,28763306,0.000270335,0.0886898,1,0.0197905,ENSG00000215004
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-364465,3:28764422,chr3,28764422,28764422,0.00034422,0.0868007,0,0.02424459,ENSG00000215004
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-364376,3:28764511,chr3,28764511,28764511,0.000523559,0.084103,0,0.0343167,ENSG00000215004
ENSG00000215004.3,chr3,29128887,29128887,+,4558,-357186,3:28771701,chr3,28771701,28771701,0.000606794,0.0861309,0,0.03870841,ENSG00000215004
ENSG00000215004.3,chr3,29128887,29128887,+,4558,276595,3:29405482,chr3,29405482,29405482,0.000509309,-0.507667,0,0.03355055,ENSG00000215004
ENSG00000215004.3,chr3,29128887,29128887,+,4558,753189,3:29882076,chr3,29882076,29882076,0.000525076,0.0807715,0,0.0343985,ENSG00000215004


In [114]:
edb <- EnsDb.Hsapiens.v86
tx <- transcripts(edb, columns=c("tx_id", "gene_id", "gene_name"))

In [115]:
mapping <- cbind(gene_id=tx$gene_id, name=tx$gene_name)
dim(mapping)
head(mapping)

gene_id,name
ENSG00000223972,DDX11L1
ENSG00000223972,DDX11L1
ENSG00000227232,WASH7P
ENSG00000278267,MIR6859-1
ENSG00000243485,MIR1302-2
ENSG00000243485,MIR1302-2


In [116]:
mapping <-mapping[!duplicated(mapping),]
dim(mapping)
head(mapping)

gene_id,name
ENSG00000223972,DDX11L1
ENSG00000227232,WASH7P
ENSG00000278267,MIR6859-1
ENSG00000243485,MIR1302-2
ENSG00000237613,FAM138A
ENSG00000268020,OR4G4P


In [117]:
qtl <- merge(x=eqtl, y=mapping, by.x='gene_id_no_version',by.y='gene_id', all.x=TRUE)
dim(qtl)
head(qtl)

gene_id_no_version,gene_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR,name
<chr>,<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>,<chr>
ENSG00000000419,ENSG00000000419.8,chr20,49575087,49575087,+,3822,646933,20:50222020,chr20,50222020,50222020,0.000324956,-0.143149,0,0.02310444,DPM1
ENSG00000000419,ENSG00000000419.8,chr20,49575087,49575087,+,3822,664972,20:50240059,chr20,50240059,50240059,0.000451591,-0.148017,0,0.03039076,DPM1
ENSG00000000419,ENSG00000000419.8,chr20,49575087,49575087,+,3822,664977,20:50240064,chr20,50240064,50240064,0.000453964,-0.148063,0,0.0305231,DPM1
ENSG00000000419,ENSG00000000419.8,chr20,49575087,49575087,+,3822,685503,20:50260590,chr20,50260590,50260590,0.000673931,-0.178445,0,0.04215094,DPM1
ENSG00000000419,ENSG00000000419.8,chr20,49575087,49575087,+,3822,813581,20:50388668,chr20,50388668,50388668,0.000624824,0.0601779,0,0.03964026,DPM1
ENSG00000000419,ENSG00000000419.8,chr20,49575087,49575087,+,3822,820811,20:50395898,chr20,50395898,50395898,0.000664205,0.059573,0,0.0416544,DPM1


now we need to get the rsids for the SNP_id

In [118]:
psych_snps <- fread("$PATH1/qtl/psychencode/SNP_Information_Table_with_Alleles.txt")
dim(psych_snps)
head(psych_snps)

PEC_id,Rsid,chr,position,REF,ALT
<chr>,<chr>,<chr>,<int>,<chr>,<chr>
1:927741,rs6665587,chr1,927741,G,A
1:928416,rs111754459,chr1,928416,G,A
1:930377,rs3128109,chr1,930377,C,T
1:931548,rs2799062,chr1,931548,G,T
1:935715,rs3128114,chr1,935715,C,T
1:938125,rs2710868,chr1,938125,G,A


In [119]:

qtl <- merge(x=qtl, y=psych_snps, by.x = "SNP_id", by.y = "PEC_id",all.x=TRUE)
class(qtl)
typeof(qtl)
dim(qtl)
head(qtl)


SNP_id,gene_id_no_version,gene_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_chr,⋯,nominal_pval,regression_slope,top_SNP,FDR,name,Rsid,chr,position,REF,ALT
<chr>,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,⋯,<dbl>,<dbl>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>
10:100000645,ENSG00000095713,ENSG00000095713.9,chr10,99696035,99696035,+,3430,304610,chr10,⋯,6.23505e-06,-0.0581551,0,0.0007121564,CRTAC1,rs61875309,chr10,100000645,A,C
10:100003785,ENSG00000166024,ENSG00000166024.9,chr10,99894428,99894428,+,3529,109357,chr10,⋯,0.00016317,-0.0393197,0,0.0128647633,R3HCC1L,rs1359508,chr10,100003785,T,C
10:100004996,ENSG00000166024,ENSG00000166024.9,chr10,99894428,99894428,+,3529,110568,chr10,⋯,0.000163502,-0.0393159,0,0.0128872297,R3HCC1L,rs2025625,chr10,100004996,G,A
10:100007362,ENSG00000095713,ENSG00000095713.9,chr10,99696035,99696035,+,3430,311327,chr10,⋯,7.17745e-06,-0.0582244,0,0.0008087173,CRTAC1,rs11816998,chr10,100007362,G,C
10:100010186,ENSG00000095713,ENSG00000095713.9,chr10,99696035,99696035,+,3430,314151,chr10,⋯,0.000629913,-0.0348725,0,0.0399014977,CRTAC1,rs4919190,chr10,100010186,A,G
10:100010186,ENSG00000269783,ENSG00000269783.1,chr10,99627889,99627889,+,3444,382297,chr10,⋯,0.000320544,0.0555388,0,0.0228401854,,rs4919190,chr10,100010186,A,G


In [120]:
#add a chr:bp:ref:alt col
qtl$'var_id' <- paste0(qtl$SNP_id,":",qtl$REF,":",qtl$ALT)
dim(qtl)
head(qtl)

SNP_id,gene_id_no_version,gene_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_chr,⋯,regression_slope,top_SNP,FDR,name,Rsid,chr,position,REF,ALT,var_id
<chr>,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,⋯,<dbl>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>
10:100000645,ENSG00000095713,ENSG00000095713.9,chr10,99696035,99696035,+,3430,304610,chr10,⋯,-0.0581551,0,0.0007121564,CRTAC1,rs61875309,chr10,100000645,A,C,10:100000645:A:C
10:100003785,ENSG00000166024,ENSG00000166024.9,chr10,99894428,99894428,+,3529,109357,chr10,⋯,-0.0393197,0,0.0128647633,R3HCC1L,rs1359508,chr10,100003785,T,C,10:100003785:T:C
10:100004996,ENSG00000166024,ENSG00000166024.9,chr10,99894428,99894428,+,3529,110568,chr10,⋯,-0.0393159,0,0.0128872297,R3HCC1L,rs2025625,chr10,100004996,G,A,10:100004996:G:A
10:100007362,ENSG00000095713,ENSG00000095713.9,chr10,99696035,99696035,+,3430,311327,chr10,⋯,-0.0582244,0,0.0008087173,CRTAC1,rs11816998,chr10,100007362,G,C,10:100007362:G:C
10:100010186,ENSG00000095713,ENSG00000095713.9,chr10,99696035,99696035,+,3430,314151,chr10,⋯,-0.0348725,0,0.0399014977,CRTAC1,rs4919190,chr10,100010186,A,G,10:100010186:A:G
10:100010186,ENSG00000269783,ENSG00000269783.1,chr10,99627889,99627889,+,3444,382297,chr10,⋯,0.0555388,0,0.0228401854,,rs4919190,chr10,100010186,A,G,10:100010186:A:G


In [121]:

print(length(evidence_genes))
for(gene in evidence_genes)
{
    print(gene)
    eqtl_fn = paste0(out_dir,'/', gene, '_eqtl.tsv')
    gwasout_fn = paste0(out_dir,'/',  gene, '_gwas.tsv')

    eqtl_gene = qtl %>% dplyr::filter(name == gene) %>% 
    mutate(rsid = Rsid, pval = nominal_pval) %>% dplyr::select(rsid, var_id, pval)
    gwas %>% dplyr::filter(ID2 %in% eqtl_gene$var_id) %>% 
    mutate(rsid = SNP, var_id = ID2, pval = p) %>% dplyr::select(rsid, var_id, pval) %>%
    fwrite(gwasout_fn, sep='\t')
    fwrite(eqtl_gene, eqtl_fn, sep='\t')
}
 

[1] 17
[1] "ABCA1"
[1] "FKTN"
[1] "FSD1L"
[1] "NIPSNAP3A"
[1] "NIPSNAP3B"
[1] "OR13C2"
[1] "OR13C3"
[1] "OR13C4"
[1] "OR13C5"
[1] "OR13C8"
[1] "OR13C9"
[1] "OR13D1"
[1] "OR13F1"
[1] "RALGAPA1P1"
[1] "SLC44A1"
[1] "TAL2"
[1] "TMEM38B"


## 5) cQTL (chromatin QTL)
no gene or transcript id available in the data so we will just check for risk snps

In [50]:
cqtl = fread('$PATH1/qtl/psychencode/DER-09_hg19_cQTL.significant.txt')

dim(cqtl)
head(cqtl)


Peak_id,Peak_chr,Peak_center,Peak_center,strand,number_of_SNPs_tested,SNP_distance_to_Peak_center,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR
<chr>,<chr>,<int>,<int>.1,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>
chr4:16598007:16599006,chr4,16598507,16598507,+,4244,-363578,4:16234929,chr4,16234929,16234929,2.89904e-06,-1.19062,1,0.028843758
chr4:16618386:16619385,chr4,16618887,16618887,+,4242,-87801,4:16531086,chr4,16531086,16531086,1.27775e-06,-0.56562,1,0.014892876
chr4:128884229:128885228,chr4,128884729,128884729,+,2727,744005,4:129628734,chr4,129628734,129628734,2.22706e-06,-0.592201,1,0.02335893
chr4:77129698:77130697,chr4,77130199,77130199,+,4355,-325157,4:76805042,chr4,76805042,76805042,4.13701e-06,0.42569,1,0.038300799
chr4:77130641:77131640,chr4,77131141,77131141,+,4356,-328501,4:76802640,chr4,76802640,76802640,3.50633e-06,0.427748,1,0.033632003
chr22:37572225:37573224,chr22,37572725,37572725,+,3895,-682143,22:36890582,chr22,36890582,36890582,6.28878e-07,-0.591706,1,0.008213661


In [51]:

cqtl <- cqtl[,c(1,2,5:15)]
head(cqtl)

Peak_id,Peak_chr,strand,number_of_SNPs_tested,SNP_distance_to_Peak_center,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR
<chr>,<chr>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>
chr4:16598007:16599006,chr4,+,4244,-363578,4:16234929,chr4,16234929,16234929,2.89904e-06,-1.19062,1,0.028843758
chr4:16618386:16619385,chr4,+,4242,-87801,4:16531086,chr4,16531086,16531086,1.27775e-06,-0.56562,1,0.014892876
chr4:128884229:128885228,chr4,+,2727,744005,4:129628734,chr4,129628734,129628734,2.22706e-06,-0.592201,1,0.02335893
chr4:77129698:77130697,chr4,+,4355,-325157,4:76805042,chr4,76805042,76805042,4.13701e-06,0.42569,1,0.038300799
chr4:77130641:77131640,chr4,+,4356,-328501,4:76802640,chr4,76802640,76802640,3.50633e-06,0.427748,1,0.033632003
chr22:37572225:37573224,chr22,+,3895,-682143,22:36890582,chr22,36890582,36890582,6.28878e-07,-0.591706,1,0.008213661


In [54]:
print(variant_data$"CHR:BP"[0:10])
print(cqtl$"SNP_id"[0:10])

 [1] "1:154898185" "1:155135036" "1:155205634" "1:161469054" "1:171719769"
 [6] "1:205723572" "1:205737739" "1:226916078" "1:232664611" "2:18147848" 
 [1] "4:16234929"  "4:16531086"  "4:129628734" "4:76805042"  "4:76802640" 
 [6] "22:36890582" "22:38316657" "22:38631848" "5:169968987" "5:172101839"


In [55]:
#merge the risk variants with the cQTL data to see if they are in the data
risk_cQTL_snps <- merge(x=variant_data, y=cqtl, by.x="CHR:BP", by.y="SNP_id")
print(dim(risk_cQTL_snps))
print(head(risk_cQTL_snps))

[1]  0 16
Empty data.table (0 rows and 16 cols): CHR:BP,Locus Number,SNP,CHR,Peak_id,Peak_chr...


## 6) isoQTL (isoform QTL)
need to separate isoforms into separate plots

In [122]:
#make a output folder depending on the gwas
#out_dir = "$PATH1/qtl/psychencode/isoqtl_tsv/meta5"
#out_dir = "$PATH1/qtl/psychencode/isoqtl_tsv/prog1"
out_dir = "$PATH1/qtl/psychencode/isoqtl_tsv/prog2"
dir.create(out_dir)

In [123]:
iqtl = fread('$PATH1/qtl/psychencode/DER-10b_hg19_isoQTL.FPKM5.all.txt')

dim(iqtl)
head(iqtl)


transcript_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>
ENST00000544594.1,chr11,71814292,71814292,+,3967,-946115,11:70868177,chr11,70868177,70868177,3.32938e-06,-0.143849,0,0.0008643359
ENST00000544594.1,chr11,71814292,71814292,+,3967,-945637,11:70868655,chr11,70868655,70868655,3.7429e-06,-0.142585,0,0.0009606373
ENST00000544594.1,chr11,71814292,71814292,+,3967,-945004,11:70869288,chr11,70869288,70869288,3.76844e-06,-0.142499,0,0.0009664706
ENST00000544594.1,chr11,71814292,71814292,+,3967,-944750,11:70869542,chr11,70869542,70869542,3.79628e-06,-0.142419,0,0.0009728959
ENST00000544594.1,chr11,71814292,71814292,+,3967,-944543,11:70869749,chr11,70869749,70869749,2.96835e-06,-0.141641,1,0.0007793781
ENST00000502597.2,chr11,71823750,71823750,+,3946,-181711,11:71642039,chr11,71642039,71642039,3.34871e-06,0.218074,0,0.0008689127


In [124]:
#add a tx id col without the version number to join on later
iqtl$tx_id_no_version <- gsub("\\..*","",iqtl$transcript_id)
dim(iqtl)
head(iqtl)

transcript_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR,tx_id_no_version
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>,<chr>
ENST00000544594.1,chr11,71814292,71814292,+,3967,-946115,11:70868177,chr11,70868177,70868177,3.32938e-06,-0.143849,0,0.0008643359,ENST00000544594
ENST00000544594.1,chr11,71814292,71814292,+,3967,-945637,11:70868655,chr11,70868655,70868655,3.7429e-06,-0.142585,0,0.0009606373,ENST00000544594
ENST00000544594.1,chr11,71814292,71814292,+,3967,-945004,11:70869288,chr11,70869288,70869288,3.76844e-06,-0.142499,0,0.0009664706,ENST00000544594
ENST00000544594.1,chr11,71814292,71814292,+,3967,-944750,11:70869542,chr11,70869542,70869542,3.79628e-06,-0.142419,0,0.0009728959,ENST00000544594
ENST00000544594.1,chr11,71814292,71814292,+,3967,-944543,11:70869749,chr11,70869749,70869749,2.96835e-06,-0.141641,1,0.0007793781,ENST00000544594
ENST00000502597.2,chr11,71823750,71823750,+,3946,-181711,11:71642039,chr11,71642039,71642039,3.34871e-06,0.218074,0,0.0008689127,ENST00000502597


In [125]:
edb <- EnsDb.Hsapiens.v86
tx <- transcripts(edb, columns=c("tx_id", "gene_id", "gene_name"))

In [126]:
mapping <- cbind(tx_id=tx$tx_id, name=tx$gene_name)
dim(mapping)
head(mapping)

tx_id,name
ENST00000456328,DDX11L1
ENST00000450305,DDX11L1
ENST00000488147,WASH7P
ENST00000619216,MIR6859-1
ENST00000473358,MIR1302-2
ENST00000469289,MIR1302-2


In [127]:
mapping <-mapping[!duplicated(mapping),]
dim(mapping)
head(mapping)

tx_id,name
ENST00000456328,DDX11L1
ENST00000450305,DDX11L1
ENST00000488147,WASH7P
ENST00000619216,MIR6859-1
ENST00000473358,MIR1302-2
ENST00000469289,MIR1302-2


In [128]:
qtl <- merge(x=iqtl, y=mapping, by.x='tx_id_no_version',by.y='tx_id', all.x=TRUE)
dim(qtl)
head(qtl)

tx_id_no_version,transcript_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_id,SNP_chr,SNP_start,SNP_end,nominal_pval,regression_slope,top_SNP,FDR,name
<chr>,<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>,<chr>
ENST00000000412,ENST00000000412.3,chr12,9096016,9096016,+,3599,-61643,12:9034373,chr12,9034373,9034373,2.36669e-07,-0.120444,0,7.729793e-05,M6PR
ENST00000000412,ENST00000000412.3,chr12,9096016,9096016,+,3599,-59754,12:9036262,chr12,9036262,9036262,1.9499e-07,-0.121873,0,6.464181e-05,M6PR
ENST00000000412,ENST00000000412.3,chr12,9096016,9096016,+,3599,-58425,12:9037591,chr12,9037591,9037591,3.178e-08,-0.12582,0,1.198125e-05,M6PR
ENST00000000412,ENST00000000412.3,chr12,9096016,9096016,+,3599,-57676,12:9038340,chr12,9038340,9038340,2.43235e-07,-0.120175,0,7.927935e-05,M6PR
ENST00000000412,ENST00000000412.3,chr12,9096016,9096016,+,3599,-55774,12:9040242,chr12,9040242,9040242,1.04313e-08,0.162043,0,4.248245e-06,M6PR
ENST00000000412,ENST00000000412.3,chr12,9096016,9096016,+,3599,-54950,12:9041066,chr12,9041066,9041066,3.16729e-08,-0.125535,0,1.194344e-05,M6PR


In [129]:
psych_snps <- fread("$PATH1/qtl/psychencode/SNP_Information_Table_with_Alleles.txt")
dim(psych_snps)
head(psych_snps)

PEC_id,Rsid,chr,position,REF,ALT
<chr>,<chr>,<chr>,<int>,<chr>,<chr>
1:927741,rs6665587,chr1,927741,G,A
1:928416,rs111754459,chr1,928416,G,A
1:930377,rs3128109,chr1,930377,C,T
1:931548,rs2799062,chr1,931548,G,T
1:935715,rs3128114,chr1,935715,C,T
1:938125,rs2710868,chr1,938125,G,A


In [130]:

qtl <- merge(x=qtl, y=psych_snps, by.x = "SNP_id", by.y = "PEC_id",all.x=TRUE)
class(qtl)
typeof(qtl)
dim(qtl)
head(qtl)


SNP_id,tx_id_no_version,transcript_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_chr,⋯,nominal_pval,regression_slope,top_SNP,FDR,name,Rsid,chr,position,REF,ALT
<chr>,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,⋯,<dbl>,<dbl>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>
10:1005381,ENST00000381344,ENST00000381344.3,chr10,1095110,1095110,+,4321,-89729,chr10,⋯,6.939020000000001e-18,-0.29296,0,7.383384e-15,IDI1,rs71494943,chr10,1005381,A,G
10:1005381,ENST00000491735,ENST00000491735.1,chr10,1095110,1095110,+,4321,-89729,chr10,⋯,1.64236e-18,0.304983,0,1.823801e-15,IDI1,rs71494943,chr10,1005381,A,G
10:1005908,ENST00000381344,ENST00000381344.3,chr10,1095110,1095110,+,4321,-89202,chr10,⋯,1.59129e-23,0.371512,0,2.380915e-20,IDI1,rs72478233,chr10,1005908,C,T
10:1005908,ENST00000491735,ENST00000491735.1,chr10,1095110,1095110,+,4321,-89202,chr10,⋯,2.18675e-22,-0.370207,0,3.096439e-19,IDI1,rs72478233,chr10,1005908,C,T
10:1012487,ENST00000381344,ENST00000381344.3,chr10,1095110,1095110,+,4321,-82623,chr10,⋯,1.0714e-19,0.327537,0,1.294643e-16,IDI1,rs11599238,chr10,1012487,A,G
10:1012487,ENST00000491735,ENST00000491735.1,chr10,1095110,1095110,+,4321,-82623,chr10,⋯,5.546689999999999e-20,-0.337368,0,6.827092e-17,IDI1,rs11599238,chr10,1012487,A,G


In [131]:
print(length(unique(qtl$tx_id_no_version)))
print(length(unique(qtl$transcript_id)))
print(length(qtl$transcript_id))
print(length(unique(qtl$name)))

[1] 8245
[1] 8245
[1] 604286
[1] 2588


In [132]:
gene_transcript_df <- qtl %>% dplyr::select(tx_id_no_version, name) %>% mutate(transcript = tx_id_no_version, gene = name) %>% dplyr::select(transcript, gene) %>% distinct
print(head(gene_transcript_df))
print(nrow(unique(gene_transcript_df)))
print(nrow(gene_transcript_df))

       transcript    gene
1 ENST00000381344    IDI1
2 ENST00000491735    IDI1
3 ENST00000441611 BLOC1S2
4 ENST00000370372 BLOC1S2
5 ENST00000299179  MRPL43
6 ENST00000318325  MRPL43
[1] 8245
[1] 8245


In [133]:
#now filter down gene_transcript_df to only include genes in our list
filtered_gene_transcript_df <- gene_transcript_df %>% dplyr::filter(gene %in% evidence_genes)
print(dim(filtered_gene_transcript_df))
print(head(filtered_gene_transcript_df))

[1] 5 2
       transcript      gene
1 ENST00000374767 NIPSNAP3A
2 ENST00000471001 NIPSNAP3A
3 ENST00000374720   SLC44A1
4 ENST00000374724   SLC44A1
5 ENST00000436716   SLC44A1


In [134]:
#add a chr:bp:ref:alt col
qtl$'var_id' <- paste0(qtl$SNP_id,":",qtl$REF,":",qtl$ALT)
dim(qtl)
head(qtl)

SNP_id,tx_id_no_version,transcript_id,gene_chr,gene_start,gene_end,strand,number_of_SNPs_tested,SNP_distance_to_TSS,SNP_chr,⋯,regression_slope,top_SNP,FDR,name,Rsid,chr,position,REF,ALT,var_id
<chr>,<chr>,<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,⋯,<dbl>,<int>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>,<chr>
10:1005381,ENST00000381344,ENST00000381344.3,chr10,1095110,1095110,+,4321,-89729,chr10,⋯,-0.29296,0,7.383384e-15,IDI1,rs71494943,chr10,1005381,A,G,10:1005381:A:G
10:1005381,ENST00000491735,ENST00000491735.1,chr10,1095110,1095110,+,4321,-89729,chr10,⋯,0.304983,0,1.823801e-15,IDI1,rs71494943,chr10,1005381,A,G,10:1005381:A:G
10:1005908,ENST00000381344,ENST00000381344.3,chr10,1095110,1095110,+,4321,-89202,chr10,⋯,0.371512,0,2.380915e-20,IDI1,rs72478233,chr10,1005908,C,T,10:1005908:C:T
10:1005908,ENST00000491735,ENST00000491735.1,chr10,1095110,1095110,+,4321,-89202,chr10,⋯,-0.370207,0,3.096439e-19,IDI1,rs72478233,chr10,1005908,C,T,10:1005908:C:T
10:1012487,ENST00000381344,ENST00000381344.3,chr10,1095110,1095110,+,4321,-82623,chr10,⋯,0.327537,0,1.294643e-16,IDI1,rs11599238,chr10,1012487,A,G,10:1012487:A:G
10:1012487,ENST00000491735,ENST00000491735.1,chr10,1095110,1095110,+,4321,-82623,chr10,⋯,-0.337368,0,6.827092e-17,IDI1,rs11599238,chr10,1012487,A,G,10:1012487:A:G


In [135]:
nrow(filtered_gene_transcript_df)
for(i in 1:nrow(filtered_gene_transcript_df))
{
    row <- filtered_gene_transcript_df[i,]
    print(paste0(row$gene, " ", row$transcript))
    isoqtl_fn = paste0(out_dir,'/', row$gene, "_", row$transcript, '_isoqtl.tsv')
    gwasout_fn = paste0(out_dir,'/', row$gene, "_", row$transcript, '_gwas.tsv')

    isoqtl_gene = qtl %>% dplyr::filter(name == row$gene) %>% dplyr::filter(tx_id_no_version == row$transcript) %>%
    mutate(rsid = Rsid, pval = nominal_pval) %>% dplyr::select(rsid, var_id, pval)
    gwas %>% dplyr::filter(ID2 %in% isoqtl_gene$var_id) %>% 
    mutate(rsid = SNP, var_id = ID2, pval = p) %>% dplyr::select(rsid, var_id, pval) %>%
    fwrite(gwasout_fn, sep='\t')
    fwrite(isoqtl_gene, isoqtl_fn, sep='\t')
}
 

[1] "NIPSNAP3A ENST00000374767"
[1] "NIPSNAP3A ENST00000471001"
[1] "SLC44A1 ENST00000374720"
[1] "SLC44A1 ENST00000374724"
[1] "SLC44A1 ENST00000436716"


## 7) fQTL (cell fraction QTL)
no gene or transcript id available in the data so we will just check for risk variants

In [14]:
fqtl = fread('$PATH1/qtl/psychencode/DER-11_hg19_fQTL.significant.txt')

dim(fqtl)
head(fqtl)


#Cell_Type,Chromosome_of_variant,Locus_of_variant,Nominal_p_val_of_association,Bonferroni_corrected_p_val,Regression_slope
<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>
Ex3,chr1,1307638,6.34747e-11,0.003372099,0.6212
Ex3,chr1,4026068,2.51291e-23,1.334986e-15,-0.340486
Ex3,chr1,8188316,8.03655e-11,0.004269424,-0.259089
Ex4,chr1,1288583,5.65267e-13,3.002986e-05,-0.346556
Ex4,chr1,3360036,4.65523e-15,2.473095e-07,-0.340727
Ex4,chr1,4026068,2.1921e-34,1.164555e-26,-0.416653


In [61]:
#need to add CHR:BP to the fQTL data
fqtl$"CHR:BP" <- paste0(gsub("chr","",fqtl$"Chromosome_of_variant"),":", fqtl$"Locus_of_variant")
print(head(fqtl))

   #Cell_Type Chromosome_of_variant Locus_of_variant
1:        Ex3                  chr1          1307638
2:        Ex3                  chr1          4026068
3:        Ex3                  chr1          8188316
4:        Ex4                  chr1          1288583
5:        Ex4                  chr1          3360036
6:        Ex4                  chr1          4026068
   Nominal_p_val_of_association Bonferroni_corrected_p_val Regression_slope
1:                  6.34747e-11               3.372099e-03         0.621200
2:                  2.51291e-23               1.334986e-15        -0.340486
3:                  8.03655e-11               4.269424e-03        -0.259089
4:                  5.65267e-13               3.002986e-05        -0.346556
5:                  4.65523e-15               2.473095e-07        -0.340727
6:                  2.19210e-34               1.164555e-26        -0.416653
      CHR:BP
1: 1:1307638
2: 1:4026068
3: 1:8188316
4: 1:1288583
5: 1:3360036
6: 1:4026068


In [76]:
testrow <- data.frame("#Cell_Type"="Ex3","Chromosome_of_variant"="chr1","Locus_of_variant"=154898185,"Nominal_p_val_of_association"=0.05,"Bonferroni_corrected_p_val"=0.005,"Regression_slope"=0.5,"CHR:BP"="1:154898185")
colnames(testrow) <- colnames(fqtl)
print(testrow)

  #Cell_Type Chromosome_of_variant Locus_of_variant
1        Ex3                  chr1        154898185
  Nominal_p_val_of_association Bonferroni_corrected_p_val Regression_slope
1                         0.05                      0.005              0.5
       CHR:BP
1 1:154898185


In [77]:
fqtl <- rbind(fqtl, testrow)

In [78]:
#merge the risk variants with the fQTL data to see if they are in the data
risk_fQTL_snps <- merge(x=variant_data, y=fqtl, by.x="CHR:BP", by.y="CHR:BP")
print(dim(risk_fQTL_snps))
print(head(risk_fQTL_snps))

[1]  1 10
        CHR:BP Locus Number         SNP CHR #Cell_Type Chromosome_of_variant
1: 1:154898185            1 rs114138760   1        Ex3                  chr1
   Locus_of_variant Nominal_p_val_of_association Bonferroni_corrected_p_val
1:        154898185                         0.05                      0.005
   Regression_slope
1:              0.5
