## HERMES - furosemide phenotype

#### Clone HeRmes repository

In [None]:
#system("git clone https://github.com/nicksunderland/heRmes.git")

#### Pushing changes to Github

In [None]:
# in ther terminal run
# cd /opt/notebooks
# dx download git_push.sh
# run bash git_push.sh 

#### Project & record ID

In [1]:
projectid <- "project-GvZyZ20J81vgPJGbJy8pgpyq"
recordid  <- "record-Gvb0Bg0Jfxfv0q8Fb2pXqKjg"

#### Libraries

In [17]:
library(glue)
library(data.table)
library(yaml)
source("/opt/notebooks/heRmes/R/ukbb_extraction_utils.R")

## Extract data

#### Download data dictionary

In [13]:
setwd("/opt/notebooks")
dataset <- glue("{projectid}:{recordid}")
cmd <- glue("dx extract_dataset {dataset} -ddd")
system(cmd)
dict_files <- list.files(pattern="codings|data_dictionary|entity_dictionary")
data_dict_file <- dict_files[grepl("data_dictionary", dict_files)]

#### Read the extraction config file

In [24]:
config <- read_yaml("/opt/notebooks/heRmes/scripts/extraction_config.yml")

extraction_template <- lapply(config, function(table) filter_data_dict(data_dict_file, table$entity, table$columns))
                       
lapply(extraction_template, head, 3)               

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,participant,eid,string,global,,,,Participant Information,,,,,,,Participant ID,
reason_lost_fu,participant,p190,integer,,data_coding_1965,,,Population characteristics > Ongoing characteristics,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=190,,,,Reason lost to follow-up,
sex,participant,p31,integer,,data_coding_9,,,Population characteristics > Baseline characteristics,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=31,,,,Sex,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
ins_index,hesin,ins_index,integer,,,,,,,,,,,,Instance index,
epistart,hesin,epistart,date,,,,,,,,,,,,Episode start date,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin_diag,eid,string,,,,,,,,,,,,Participant ID,
ins_index,hesin_diag,ins_index,integer,,,,,,,,,,,,Instance index,
diag_icd9,hesin_diag,diag_icd9,string,,data_coding_87,,,,,,,,,,Diagnoses - ICD9,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin_oper,eid,string,,,,,,,,,,,,Participant ID,
ins_index,hesin_oper,ins_index,integer,,,,,,,,,,,,Instance index,
oper3,hesin_oper,oper3,string,,data_coding_259,,,,,,,,,,Operative procedures - OPCS3,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,gp_clinical,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
data_provider,gp_clinical,data_provider,string,,data_coding_626,,,,,,,,,,Data provider,
date,gp_clinical,event_dt,date,,data_coding_819,,,,,yes,,,,,Date clinical code was entered,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,gp_scripts,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
data_provider,gp_scripts,data_provider,string,,data_coding_626,,,,,,,,,,Data provider,
date,gp_scripts,issue_date,date,,data_coding_819,,,,,yes,,,,,Date prescription was issued,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,olink_instance_0,eid,string,local,,,,,,,,,participant:eid,one_to_one,Participant ID,
ntprobnp,olink_instance_0,ntprobnp,float,,,,,,,,,,,,NTproBNP;N-terminal prohormone of brain natriuretic peptide,
glp1r,olink_instance_0,glp1r,float,,,,,,,,,,,,GLP1R;Glucagon-like peptide 1 receptor,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,olink_instance_2,eid,string,local,,,,,,,,,participant:eid,one_to_one,Participant ID,
ntprobnp,olink_instance_2,ntprobnp,float,,,,,,,,,,,,NTproBNP;N-terminal prohormone of brain natriuretic peptide,

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,olink_instance_3,eid,string,local,,,,,,,,,participant:eid,one_to_one,Participant ID,
ntprobnp,olink_instance_3,ntprobnp,float,,,,,,,,,,,,NTproBNP;N-terminal prohormone of brain natriuretic peptide,


#### Run Table-Exporter extraction

In [34]:
for (table in names(config)) {
    
    filepath <- file.path("/mnt/project", config[[table]][["output"]]) 
    if (!file.exists(filepath)) {
        extract_data(dataset = dataset, 
                     fields  = extraction_template[[table]][["name"]],
                     entity  = config[[table]][["entity"]],
                     output  = config[[table]][["output"]])
    } else {
        cat("File:", filepath, "- exists\n")
    }
    
}


Using input JSON:
{
    "output": "data_participant",
    "output_format": "TSV",
    "header_style": "FIELD-NAME",
    "coding_option": "RAW",
    "field_names": [
        "eid",
        "p190",
        "p31",
        "p21022",
        "p21842_i0",
        "p21842_i1",
        "p21842_i2",
        "p21842_i3",
        "p21842_i0",
        "p21842_i1",
        "p21842_i2",
        "p21842_i3",
        "p21000_i0",
        "p21000_i1",
        "p21000_i2",
        "p21000_i3",
        "p22001",
        "p22006",
        "p22009_a1",
        "p22009_a2",
        "p22009_a3",
        "p22009_a4",
        "p22009_a5",
        "p20002_i0_a0",
        "p20002_i0_a1",
        "p20002_i0_a2",
        "p20002_i0_a3",
        "p20002_i0_a4",
        "p20002_i0_a5",
        "p20002_i0_a6",
        "p20002_i0_a7",
        "p20002_i0_a8",
        "p20002_i0_a9",
        "p20002_i0_a10",
        "p20002_i0_a11",
        "p20002_i0_a12",
        "p20002_i0_a13",
        "p20002_i0_a14",
        "p20