In [2]:
require('tidyverse')

Loading required package: tidyverse
── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 2.2.1     ✔ purrr   0.2.4
✔ tibble  1.4.2     ✔ dplyr   0.7.4
✔ tidyr   0.8.1     ✔ stringr 1.3.0
✔ readr   1.1.1     ✔ forcats 0.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


In [3]:


phe_codes <- read.csv("../phe_extraction/ListPheCodes.csv", stringsAsFactors=FALSE)
head(phe_codes)

trait,category,variable_name,readable_name,X
3140,sex specific,pregnant,pregnant,
2724,sex specific,menopause,menopause,
3581,sex specific,menopause_age,age menopause,
3591,sex specific,hysterectomy,hysterectomy,
2834,sex specific,oophorectomy,oophorectomy,
3700,sex specific,time_since_period,time since last menstrual period,


In [8]:
phe_to_run <- select(filter(phe_codes, ! category %in% c("biomarker", "sex specific", "general")), trait, category, variable_name)

phe_to_run$derived <- ifelse(phe_to_run$trait %in% c("whr", "FEV_FVC", "trunk_fp", "leg_fp", "arm_fp"), TRUE, FALSE)
head(phe_to_run)


trait,category,variable_name,derived
whr,anthropometric traits,waist-hip-ratio,True
21001,anthropometric traits,BMI,False
50,anthropometric traits,height,False
48,anthropometric traits,waist_circum,False
49,anthropometric traits,hip_circum,False
4079,lab test,BP-diastolic,False


In [40]:
# add the INI to the trait mames that are not derived
addINI <- function(trait){
    x <- strsplit(trait, "I")[[1]]
    y <- paste("INI", x[[length(x)]], sep="")
    return(y)
}

addINI("21001")
addINI("INI151")

phe_to_find <- filter(phe_to_run, !derived)
phe_to_find$trait <- sapply(phe_to_find$trait, addINI)
head(phe_to_find)

phe_to_run2 <- rbind(phe_to_find, filter(phe_to_run, derived))
dim(phe_to_run)
dim(phe_to_find)

write.table(phe_to_run2, file="../phe_extraction/phe_to_run.txt", sep="\t", quote=FALSE, row.names=FALSE, col.names=FALSE)

trait,category,variable_name,derived
INI21001,anthropometric traits,BMI,False
INI50,anthropometric traits,height,False
INI48,anthropometric traits,waist_circum,False
INI49,anthropometric traits,hip_circum,False
INI4079,lab test,BP-diastolic,False
INI4080,lab test,BP-systolic,False


In [None]:
require('data.table')

phe_loc <- fread("/oak/stanford/groups/mrivas/ukbb/24983/phenotypedata/most_recent_phenos_20190422.txt", header=FALSE, data.table=FALSE)


In [26]:
colnames(phe_loc) <- "path"
phe_loc$phe_id <- sapply(phe_loc$path, function(x) {y <- strsplit(x, "/")[[1]]; strsplit(y[[length(y)]], ".", fixed=TRUE)[[1]][[1]]})

In [31]:
phe_paths <- left_join(phe_to_find, phe_loc, by=c("trait"="phe_id"))
head(phe_paths)
table(is.na(phe_paths$path)) # ALL PRESENT - yay!
write.table(phe_paths, file="../phe_extraction/list_traits_w_paths.txt", sep="\t", row.names=FALSE, quote=FALSE)


trait,category,variable_name,derived,path
INI21001,anthropometric traits,BMI,False,/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/9796/24611/phe/INI21001.phe
INI50,anthropometric traits,height,False,/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/9796/24611/phe/INI50.phe
INI48,anthropometric traits,waist_circum,False,/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/9796/24611/phe/INI48.phe
INI49,anthropometric traits,hip_circum,False,/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/9796/24611/phe/INI49.phe
INI4079,lab test,BP-diastolic,False,/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/9796/24611/phe/INI4079.phe
INI4080,lab test,BP-systolic,False,/oak/stanford/groups/mrivas/ukbb24983/phenotypedata/9796/24611/phe/INI4080.phe



FALSE 
   74 

In [32]:
# TODO:
# - visualize all the ranges?


# - load all the quant files

#### DERIVED PHENOS

In [38]:
dim(phe48)
phe48 <- read.table("../phefiles/INI48.phe") # WC
phe49 <- read.table("../phefiles/INI49.phe") # HC
pheWhr <- inner_join(phe48, select(phe49, c("V1", "V3")), by="V1")

pheWhr$V3 <- pheWhr$V3.x/pheWhr$V3.y
head(pheWhr)
write.table(select(pheWhr,c("V1", "V2", "V3")), file="../phefiles/whr.phe", col.names=FALSE, row.names=FALSE, quote=FALSE)


V1,V2,V3.x,V3.y,V3
4646215,4646215,98.0,101.0,0.970297
5007659,5007659,100.0,108.0,0.9259259
1038854,1038854,75.5,99.5,0.758794
5441319,5441319,106.0,104.0,1.0192308
5743544,5743544,87.5,93.5,0.9358289
3266897,3266897,118.0,113.0,1.0442478


In [39]:
phe20150 <- read.table("../phefiles/INI20150.phe") # FEV-1
phe3063 <- read.table("../phefiles/INI3063.phe") # FVC
pheFP <- inner_join(phe20150, select(phe3063, c("V1", "V3")), by="V1")
pheFP$V3 <- pheFP$V3.x/pheFP$V3.y

head(pheFP)

write.table(select(pheFP,c("V1", "V2", "V3")), file="../phefiles/FEV_FVC.phe", col.names=FALSE, row.names=FALSE, quote=FALSE)


V1,V2,V3.x,V3.y,V3
4646215,4646215,2.65,2.65,1.0
5007659,5007659,3.87,3.79,1.021108
5441319,5441319,1.93,1.92,1.005208
3266897,3266897,2.76,2.75,1.003636
3990694,3990694,2.56,2.56,1.0
1080519,1080519,2.24,2.24,1.0
