## HERMES - furosemide phenotype

#### Clone HeRmes repository

In [None]:
#system("git clone https://github.com/nicksunderland/heRmes.git")

#### Pushing changes to Github

In [None]:
# in ther terminal run
# cd /opt/notebooks
# dx download git_push.sh
# run bash git_push.sh 

#### Project & record ID

In [1]:
projectid <- "project-GvZyZ20J81vgPJGbJy8pgpyq"
recordid  <- "record-Gvb0Bg0Jfxfv0q8Fb2pXqKjg"

#### Libraries

In [2]:
library(glue)
library(data.table)
library(yaml)
source("/opt/notebooks/heRmes/R/ukbb_extraction_utils.R")

## Extract data
Given the large datasets we make use of the `dx run table-exporter` to extract the required phenotype data. The extraction function below will create a table-exportor job which you will be able to track in the 'Monitor' table on your RAP's homepage. The data will be extracted to your project into a folder called `hermes3_data`. The data is not immediately uploaded to this session, although we will import it later. 

To get the small data dictionaries locally in this session we use the `dx extract_dataset` function. 

#### Download data dictionary

In [3]:
setwd("/opt/notebooks")
dataset <- glue("{projectid}:{recordid}")
cmd <- glue("dx extract_dataset {dataset} -ddd")
system(cmd)
dict_files <- list.files(pattern="codings|data_dictionary|entity_dictionary")
data_dict_file <- dict_files[grepl("data_dictionary", dict_files)]

#### Read the extraction config file

In [9]:
config <- read_yaml("/opt/notebooks/heRmes/scripts/ukbb_extraction_config.yml")

extraction_template <- lapply(config, function(table) filter_ukbb_data_dict(data_dict_file, table$entity, table$columns))
                       
lapply(extraction_template, head, Inf)   # 3->Inf to see all            

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,participant,eid,string,global,,,,Participant Information,,,,,,,Participant ID,
assessment_date,participant,p53_i0,date,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=53,,,,Date of attending assessment centre | Instance 0,
assessment_date,participant,p53_i1,date,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=53,,,,Date of attending assessment centre | Instance 1,
assessment_date,participant,p53_i2,date,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=53,,,,Date of attending assessment centre | Instance 2,
assessment_date,participant,p53_i3,date,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=53,,,,Date of attending assessment centre | Instance 3,
assessment_age,participant,p21003_i0,integer,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=21003,,,,Age when attended assessment centre | Instance 0,years
assessment_age,participant,p21003_i1,integer,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=21003,,,,Age when attended assessment centre | Instance 1,years
assessment_age,participant,p21003_i2,integer,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=21003,,,,Age when attended assessment centre | Instance 2,years
assessment_age,participant,p21003_i3,integer,,,,,Assessment centre > Recruitment > Reception,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=21003,,,,Age when attended assessment centre | Instance 3,years
reason_lost_fu,participant,p190,integer,,data_coding_1965,,,Population characteristics > Ongoing characteristics,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=190,,,,Reason lost to follow-up,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
ins_index,hesin,ins_index,integer,,,,,,,,,,,,Instance index,
epistart,hesin,epistart,date,,,,,,,,,,,,Episode start date,
admidate,hesin,admidate,date,,,,,,,,,,,,Date of admission to hospital,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin_diag,eid,string,,,,,,,,,,,,Participant ID,
ins_index,hesin_diag,ins_index,integer,,,,,,,,,,,,Instance index,
diag_icd9,hesin_diag,diag_icd9,string,,data_coding_87,,,,,,,,,,Diagnoses - ICD9,
diag_icd10,hesin_diag,diag_icd10,string,,data_coding_19,,,,,,,,,,Diagnoses - ICD10,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin_oper,eid,string,,,,,,,,,,,,Participant ID,
ins_index,hesin_oper,ins_index,integer,,,,,,,,,,,,Instance index,
oper3,hesin_oper,oper3,string,,data_coding_259,,,,,,,,,,Operative procedures - OPCS3,
oper4,hesin_oper,oper4,string,,data_coding_240,,,,,,,,,,Operative procedures - OPCS4,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,gp_clinical,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
data_provider,gp_clinical,data_provider,string,,data_coding_626,,,,,,,,,,Data provider,
date,gp_clinical,event_dt,date,,data_coding_819,,,,,yes,,,,,Date clinical code was entered,
read_2,gp_clinical,read_2,string,,,,,,,,,,,,Read v2 code,
read_3,gp_clinical,read_3,string,,,,,,,,,,,,CTV3 (Read v3) code,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,gp_scripts,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
data_provider,gp_scripts,data_provider,string,,data_coding_626,,,,,,,,,,Data provider,
date,gp_scripts,issue_date,date,,data_coding_819,,,,,yes,,,,,Date prescription was issued,
read_2,gp_scripts,read_2,string,,,,,,,,,,,,Read v2 code,
bnf_code,gp_scripts,bnf_code,string,,,,,,,,,,,,BNF code,
dmd_code,gp_scripts,dmd_code,string,,,,,,,,,,,,dm+d code,
drug_name,gp_scripts,drug_name,string,,,,,,,,,,,,Drug name,
quantity,gp_scripts,quantity,string,,,,,,,,,,,,Quantity issued,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,olink_instance_0,eid,string,local,,,,,,,,,participant:eid,one_to_one,Participant ID,
ntprobnp,olink_instance_0,ntprobnp,float,,,,,,,,,,,,NTproBNP;N-terminal prohormone of brain natriuretic peptide,
glp1r,olink_instance_0,glp1r,float,,,,,,,,,,,,GLP1R;Glucagon-like peptide 1 receptor,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,olink_instance_2,eid,string,local,,,,,,,,,participant:eid,one_to_one,Participant ID,
ntprobnp,olink_instance_2,ntprobnp,float,,,,,,,,,,,,NTproBNP;N-terminal prohormone of brain natriuretic peptide,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,olink_instance_3,eid,string,local,,,,,,,,,participant:eid,one_to_one,Participant ID,
ntprobnp,olink_instance_3,ntprobnp,float,,,,,,,,,,,,NTproBNP;N-terminal prohormone of brain natriuretic peptide,


#### Run Table-Exporter extraction
Check the monitor tab in the project for completion of the file extraction jobs

In [10]:
for (table in names(config)) {
    
    filepath <- file.path("/mnt/project", config[[table]][["output"]]) 
    if (!file.exists(filepath)) {
        extract_ukbb_data(dataset = dataset, 
                          fields  = extraction_template[[table]][["name"]],
                          entity  = config[[table]][["entity"]],
                          output  = config[[table]][["output"]])
    } else {
        cat("File:", filepath, "- exists\n")
    }
    
}

File: /mnt/project/hermes3_data/data_participant.tsv - exists
File: /mnt/project/hermes3_data/data_hesin.tsv - exists
File: /mnt/project/hermes3_data/data_hesin_diag.tsv - exists
File: /mnt/project/hermes3_data/data_hesin_oper.tsv - exists
File: /mnt/project/hermes3_data/data_gp_clinical.tsv - exists
File: /mnt/project/hermes3_data/data_gp_scripts.tsv - exists
File: /mnt/project/hermes3_data/data_olink_instance_0.tsv - exists
File: /mnt/project/hermes3_data/data_olink_instance_2.tsv - exists
File: /mnt/project/hermes3_data/data_olink_instance_3.tsv - exists
