## HERMES - furosemide phenotype

#### Clone HeRmes repository

In [5]:
#system("git clone https://github.com/nicksunderland/heRmes.git")

#### Pushing changes to Github

In [6]:
# in ther terminal run
# cd /opt/notebooks
# dx download git_push.sh
# run bash git_push.sh 

#### Project & record ID

In [4]:
projectid <- "project-GvZyZ20J81vgPJGbJy8pgpyq"
recordid  <- "record-Gvb0Bg0Jfxfv0q8Fb2pXqKjg"

#### Libraries

In [14]:
library(glue)
library(data.table)
if (!requireNamespace("bit64", quietly = TRUE)) {
  install.packages("bit64")
}
library(bit64)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependency ‘bit’


Loading required package: bit


Attaching package: ‘bit’


The following object is masked from ‘package:data.table’:

    setattr


The following object is masked from ‘package:base’:

    xor


Attaching package bit64

package:bit64 (c) 2011-2017 Jens Oehlschlaegel

creators: integer64 runif64 seq :

coercion: as.integer64 as.vector as.logical as.integer as.double as.character as.bitstring

logical operator: ! & | xor != == < <= >= >

arithmetic operator: + - * / %/% %% ^

math: sign abs sqrt log log2 log10

math: floor ceiling trunc round

querying: is.integer64 is.vector [is.atomic} [length] format print str

values: is.na is.nan is.finite is.infinite

aggregation: any all min max range sum prod

cumulation: diff cummin cummax cumsum cumprod

access: length<- [ [<- [[ [[<-

combine: c rep cbind rbind as.data.frame



for more help type ?bit64


Attaching package:

## Extract GP data

#### Download data dictionary

In [6]:
setwd("/opt/notebooks")
dataset <- glue("{projectid}:{recordid}")
cmd <- glue("dx extract_dataset {dataset} -ddd")
system(cmd)
dict_files <- list.files(pattern="codings|data_dictionary|entity_dictionary")
data_dict_file <- dict_files[grepl("data_dictionary", dict_files)]

#### Data dictionary filter function

In [7]:
#' @title filter_data_dict
#'
#' @param dict_path, str, path to the dataset.data_dictionary.csv
#' @param codes_str, list, list of lists representing UKBB column name, table entity, and search strategy list(name=, entity=, search=). 
#'   name must be a valid column name in the data_dictionary, entity a valid entity in the entity dictionary, and search either "matches"
#'   for exact matches, or starts with to match cases of multiple instances (repeated measures usually)
#'
#' @returns a filtered subset of the data_dictionary 
#'
filter_data_dict <- function(dict_path, codes_struc) {
    
    data_dict <- fread(dict_path)
    
    d <- lapply(codes_struc, function(x) {
        
        d0 <- data.table()
        if (x$search=="matches") {
            d0 <- data_dict[entity==x$entity & name==x$name]
        } else if (x$search=="startswith") {
            d0 <- data_dict[entity==x$entity & grepl(paste0("^", x$name), name)]
        }
        
        if (nrow(d0)==0) {
            cat(glue("Code [{x$name}] not found in data dictionary\n"))
            stop("Code not found error")
        }
        
        d0
        
    }) |> rbindlist(idcol = "item")
    
    return(d)
}

#### Data extraction function

In [8]:
#' @title extract_data
#'
#' @param dataset, str, a valid dataset id - format "{projectid}:{recordid}" 
#' @param fields, str, vector of UK-BB format column names e.g. p31
#' @param entity, str, string of length one - the entity to extract from e.g. participants
#' @param output, str, the base name for the output file, no extension
#'
#' @returns NULL side effect is starting a table-exporter job which outputs the file to /hermes3_data directory in the RAP
#'
extract_data <- function(dataset, fields, entity, output) {
    
    field_str <- paste0('-ifield_names="', fields, '"', collapse=" ") 
    
    cmd <- glue(
      "dx run table-exporter ",
      "-idataset_or_cohort_or_dashboard={dataset} ",
      "-ioutput={output} ",
      "-ioutput_format=TSV ",
      "-iheader_style=FIELD-NAME ",
      "-icoding_option=RAW ",
      "{field_str} ",
      "-ientity={entity} ",
      "--destination hermes3_data/"
    )    

    o <- system(cmd, intern = TRUE)
    cat(o, sep = "\n")
}

### Define participant data

In [17]:
participant_codes= list(eid                = list(name="eid",       entity="participant", search="matches"),
                        reason_lost_fu     = list(name="p190",      entity="participant", search="matches"),
                        sex                = list(name="p31",       entity="participant", search="matches"),
                        age                = list(name="p21022",    entity="participant", search="matches"),
                        ethnicity          = list(name="p21000",    entity="participant", search="startswith"),
                        genetic_sex        = list(name="p22001",    entity="participant", search="matches"),
                        genetic_ethnicity  = list(name="p22006",    entity="participant", search="matches"),
                        pc1                = list(name="p22009_a1", entity="participant", search="matches"),
                        pc2                = list(name="p22009_a2", entity="participant", search="matches"),
                        pc3                = list(name="p22009_a3", entity="participant", search="matches"),
                        pc4                = list(name="p22009_a4", entity="participant", search="matches"),
                        pc5                = list(name="p22009_a5", entity="participant", search="matches"))

participant_data_dict = filter_data_dict(data_dict_file, participant_codes)
head(participant_data_dict, 3)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,participant,eid,string,global,,,,Participant Information,,,,,,,Participant ID,
reason_lost_fu,participant,p190,integer,,data_coding_1965,,,Population characteristics > Ongoing characteristics,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=190,,,,Reason lost to follow-up,
sex,participant,p31,integer,,data_coding_9,,,Population characteristics > Baseline characteristics,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=31,,,,Sex,


### Define self-report illness data

In [18]:
self_illness_codes=list(eid                = list(name="eid",    entity="participant", search="matches"),
                        self_rep_ill       = list(name="p20002", entity="participant", search="startswith"), # 0:3 instances
                        self_rep_ill_year  = list(name="p20008", entity="participant", search="startswith"), # 0:3 instances
                        self_rep_proc      = list(name="p20004", entity="participant", search="startswith"), # 0:3 instances
                        self_rep_proc_year = list(name="p20010", entity="participant", search="startswith")) # 0:3 instances

self_rep_data_dict = filter_data_dict(data_dict_file, self_illness_codes)
head(self_rep_data_dict, 3)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,participant,eid,string,global,,,,Participant Information,,,,,,,Participant ID,
self_rep_ill,participant,p20002_i0_a0,integer,,data_coding_6,,,Assessment centre > Verbal interview > Medical conditions,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=20002,,,,"Non-cancer illness code, self-reported | Instance 0 | Array 0",
self_rep_ill,participant,p20002_i0_a1,integer,,data_coding_6,,,Assessment centre > Verbal interview > Medical conditions,,,http://biobank.ctsu.ox.ac.uk/crystal/field.cgi?id=20002,,,,"Non-cancer illness code, self-reported | Instance 0 | Array 1",


### Define HES inpatient data

In [19]:
hesin_to_extract = list(eid                = list(name="eid",       entity="hesin", search="matches"),
                        ins_index          = list(name="ins_index", entity="hesin", search="matches"),
                        epistart           = list(name="epistart",  entity="hesin", search="matches"),
                        admidate           = list(name="admidate",  entity="hesin", search="matches"))

hes_data_dict = filter_data_dict(data_dict_file, hesin_to_extract)
head(hes_data_dict, 4)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
ins_index,hesin,ins_index,integer,,,,,,,,,,,,Instance index,
epistart,hesin,epistart,date,,,,,,,,,,,,Episode start date,
admidate,hesin,admidate,date,,,,,,,,,,,,Date of admission to hospital,


### Define HES diagnoses data

In [20]:
hesdiag_to_extract=list(eid                = list(name="eid",        entity="hesin_diag", search="matches"),
                        ins_index          = list(name="ins_index",  entity="hesin_diag", search="matches"),
                        diag_icd9          = list(name="diag_icd9",  entity="hesin_diag", search="matches"),
                        diag_icd10         = list(name="diag_icd10", entity="hesin_diag", search="matches"))

hesdiag_data_dict = filter_data_dict(data_dict_file, hesdiag_to_extract)
head(hesdiag_data_dict, 4)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin_diag,eid,string,,,,,,,,,,,,Participant ID,
ins_index,hesin_diag,ins_index,integer,,,,,,,,,,,,Instance index,
diag_icd9,hesin_diag,diag_icd9,string,,data_coding_87,,,,,,,,,,Diagnoses - ICD9,
diag_icd10,hesin_diag,diag_icd10,string,,data_coding_19,,,,,,,,,,Diagnoses - ICD10,


### Define HES procedures data

In [21]:
hesproc_to_extract=list(eid                = list(name="eid",       entity="hesin_oper", search="matches"),
                        ins_index          = list(name="ins_index", entity="hesin_oper", search="matches"),
                        oper3              = list(name="oper3",     entity="hesin_oper", search="matches"),
                        oper4              = list(name="oper4",     entity="hesin_oper", search="matches"))
hesoper_data_dict = filter_data_dict(data_dict_file, hesproc_to_extract)
head(hesoper_data_dict, 4)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,hesin_oper,eid,string,,,,,,,,,,,,Participant ID,
ins_index,hesin_oper,ins_index,integer,,,,,,,,,,,,Instance index,
oper3,hesin_oper,oper3,string,,data_coding_259,,,,,,,,,,Operative procedures - OPCS3,
oper4,hesin_oper,oper4,string,,data_coding_240,,,,,,,,,,Operative procedures - OPCS4,


#### Define GP clinical data

In [22]:
gp_clinical_codes= list(eid           = list(name="eid",           entity="gp_clinical", search="matches"),
                        data_provider = list(name="data_provider", entity="gp_clinical", search="matches"),
                        date          = list(name="event_dt",      entity="gp_clinical", search="matches"),
                        read_2        = list(name="read_2",        entity="gp_clinical", search="matches"),
                        read_3        = list(name="read_3",        entity="gp_clinical", search="matches"),
                        value1        = list(name="value1",        entity="gp_clinical", search="matches"),
                        value2        = list(name="value2",        entity="gp_clinical", search="matches"),
                        value3        = list(name="value3",        entity="gp_clinical", search="matches"))

gp_clinical_data_dict = filter_data_dict(data_dict_file, gp_clinical_codes)
head(gp_clinical_data_dict, 3)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,gp_clinical,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
data_provider,gp_clinical,data_provider,string,,data_coding_626,,,,,,,,,,Data provider,
date,gp_clinical,event_dt,date,,data_coding_819,,,,,yes,,,,,Date clinical code was entered,


#### Define GP medications data

In [23]:
gp_medication_codes= list(eid           = list(name="eid",           entity="gp_scripts", search="matches"),
                          data_provider = list(name="data_provider", entity="gp_scripts", search="matches"),
                          date          = list(name="issue_date",    entity="gp_scripts", search="matches"),
                          read_2        = list(name="read_2",        entity="gp_scripts", search="matches"),
                          bnf_code      = list(name="bnf_code",      entity="gp_scripts", search="matches"),
                          dmd_code      = list(name="dmd_code",      entity="gp_scripts", search="matches"),
                          drug_name     = list(name="drug_name",     entity="gp_scripts", search="matches"),
                          quantity      = list(name="quantity",      entity="gp_scripts", search="matches"))

gp_medication_data_dict = filter_data_dict(data_dict_file, gp_medication_codes)
head(gp_medication_data_dict, 3)

item,entity,name,type,primary_key_type,coding_name,concept,description,folder_path,is_multi_select,is_sparse_coding,linkout,longitudinal_axis_type,referenced_entity_field,relationship,title,units
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<chr>,<chr>,<chr>,<chr>,<lgl>,<chr>,<chr>,<chr>,<chr>
eid,gp_scripts,eid,string,,,,,,,,,,participant:eid,many_to_one,Participant ID,
data_provider,gp_scripts,data_provider,string,,data_coding_626,,,,,,,,,,Data provider,
date,gp_scripts,issue_date,date,,data_coding_819,,,,,yes,,,,,Date prescription was issued,


#### Run Table-Exporter extraction

In [24]:
data_file_paths <- list(
    demog = "/mnt/project/hermes3_data/data_participant.tsv",
    self  = "/mnt/project/hermes3_data/data_selfreportedillness.tsv",
    hesin = "/mnt/project/hermes3_data/data_hesin.tsv",
    diag  = "/mnt/project/hermes3_data/data_hesin_diag.tsv",
    oper  = "/mnt/project/hermes3_data/data_hesin_oper.tsv",
    gp    = "/mnt/project/hermes3_data/data_gp_clinical.tsv",
    med   = "/mnt/project/hermes3_data/data_gp_medication.tsv"
)

if (!file.exists(data_file_paths$demog)) {
    extract_data(dataset=dataset, fields=participant_data_dict$name, entity="participant", output = "data_participant")
}
if (!file.exists(data_file_paths$self)) {
    extract_data(dataset=dataset, fields=self_rep_data_dict$name,    entity="participant", output = "data_selfreportedillness")
}
if (!file.exists(data_file_paths$hesin)) {
    extract_data(dataset=dataset, fields=hes_data_dict$name,         entity="hesin",       output = "data_hesin")
}
if (!file.exists(data_file_paths$diag)) {
    extract_data(dataset=dataset, fields=hesdiag_data_dict$name,     entity="hesin_diag",  output = "data_hesin_diag")
}
if (!file.exists(data_file_paths$oper)) {
    extract_data(dataset=dataset, fields=hesoper_data_dict$name,     entity="hesin_oper",  output = "data_hesin_oper")
}
if (!file.exists(data_file_paths$gp)) {
    extract_data(dataset=dataset, fields=gp_clinical_data_dict$name, entity="gp_clinical", output = "data_gp_clinical")
}
if (!file.exists(data_file_paths$med)) {
    extract_data(dataset=dataset, fields=gp_medication_data_dict$name, entity="gp_scripts", output = "data_gp_medication")
}

## Read in extracted data

In [15]:
data_files <- list()

for (i in seq_along(data_file_paths)) {
    if (!file.exists(data_file_paths[[i]])) {
        cat(glue("Error:\nFile {basename(data_file_paths[[i]])} not found, ",
                 "check the Monitor tab for the status of the Table-exporter ",
                 "and the 'hermes_data' folder. If this has finished try ",
                 "launching another Notebook session/instance (I'm not sure why ",
                 "the mounted /mnt/project/ file structure doesn't refresh when ",
                 "files are added externally."))
        stop("file not found error")
    }
    f <- data_file_paths[[i]]
    n <- names(data_file_paths)[i]
    cat(glue('...{n}: {f}\n'), sep="\n")
    flush.console()
    data_files[[n]] <- fread(f)
}

lapply(data_files, head, n = 5)

...demog: /mnt/project/hermes3_data/data_participant.tsv
...self: /mnt/project/hermes3_data/data_selfreportedillness.tsv
...hesin: /mnt/project/hermes3_data/data_hesin.tsv
...diag: /mnt/project/hermes3_data/data_hesin_diag.tsv
...oper: /mnt/project/hermes3_data/data_hesin_oper.tsv
...gp: /mnt/project/hermes3_data/data_gp_clinical.tsv
...med: /mnt/project/hermes3_data/data_gp_medication.tsv




eid,p190,p31,p21022,p21000_i0,p21000_i1,p21000_i2,p21000_i3,p22001,p22006,p22009_a1,p22009_a2,p22009_a3,p22009_a4,p22009_a5
<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1062757,,0,62,1001,,,,0.0,1.0,-12.6693,4.30928,-2.23438,-0.109226,-7.364
2217356,,1,57,1001,,,,,,,,,,
3712401,,1,56,1001,,,,1.0,1.0,-13.7558,5.64773,-3.58187,7.74165,21.0285
1011090,,0,49,1001,,,,0.0,1.0,-12.3794,2.03865,-0.837131,-0.562303,2.60283
2874739,,0,59,1001,,,,0.0,1.0,-12.6667,4.18819,-1.96996,2.85875,0.6597

eid,p20002_i0_a0,p20002_i0_a1,p20002_i0_a2,p20002_i0_a3,p20002_i0_a4,p20002_i0_a5,p20002_i0_a6,p20002_i0_a7,p20002_i0_a8,⋯,p20010_i3_a22,p20010_i3_a23,p20010_i3_a24,p20010_i3_a25,p20010_i3_a26,p20010_i3_a27,p20010_i3_a28,p20010_i3_a29,p20010_i3_a30,p20010_i3_a31
<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
1000074,1351,,,,,,,,,⋯,,,,,,,,,,
1000194,1086,,,,,,,,,⋯,,,,,,,,,,
1000258,1065,1265.0,1465.0,,,,,,,⋯,,,,,,,,,,
1000280,1436,,,,,,,,,⋯,,,,,,,,,,
1000299,1154,,,,,,,,,⋯,,,,,,,,,,

eid,ins_index,epistart,admidate
<int>,<int>,<IDate>,<IDate>
3026745,17,2009-04-23,2009-04-23
5469223,105,2013-06-18,2013-06-18
2099936,32,2021-02-18,2021-02-18
5152448,0,1997-05-07,1997-05-07
5944709,19,2017-12-13,2017-12-12

eid,ins_index,diag_icd9,diag_icd10
<int>,<int>,<chr>,<chr>
2097360,0,,D140
2622045,9,,Z800
4723574,4,,N183
5697013,17,,Z888
5858023,42,,M059

eid,ins_index,oper3,oper4
<int>,<int>,<int>,<chr>
2940276,4,,W822
3772102,12,,K634
5994223,4,,E492
4168836,0,,Y819
4239222,0,,Y767

eid,data_provider,event_dt,read_2,read_3,value1,value2,value3
<int>,<int>,<IDate>,<chr>,<chr>,<chr>,<chr>,<chr>
1545611,3,2001-07-11,,XE2JU,,,
3261273,3,2005-02-08,,XaF6J,,,
5677515,3,2015-05-12,,42N..,0.4,,
4823906,3,2014-03-26,,XaFsp,,,
2656445,3,2008-04-16,,2469.,174.0,,

eid,data_provider,issue_date,read_2,bnf_code,dmd_code,drug_name,quantity
<int>,<int>,<IDate>,<chr>,<chr>,<int64>,<chr>,<chr>
2894391,3,2001-11-08,,04.07.02.00.00,,Kapake 30mg/500mg tablets (Galen Ltd),100 tablet(s)
1469022,4,2011-12-28,blf2.,,,,
4507779,3,2015-11-24,,03.01.01.03.00,,Ventolin 100micrograms/dose Evohaler (GlaxoSmithKline UK Ltd),400 dose
1548822,2,2006-12-29,,0501030I0AAABAB,,DOXYCYCLINE CAPSULES 100MG,8.000
5381837,1,2016-06-14,bu51.00,,319799004.0,Clopidogrel 75mg tablets,28.000


## Rename columns

In [25]:
rename_cols <- function(d, code_struc) {
    for (col in names(code_struc)) {
        if (code_struc[[col]]$search=="matches") {
            setnames(d, code_struc[[col]]$name, col)
        } else if (code_struc[[col]]$search=="startswith") {
            regex     <- paste0("^", code_struc[[col]]$name)
            matches   <- names(d)[grepl(regex, names(d))]
            new_names <- paste0(col, "_", 1:length(matches))
            setnames(d, matches, new_names)
        }
    }
    return(d)
}

data_files$demog <- rename_cols(data_files$demog, code_struc=participant_codes)
data_files$self  <- rename_cols(data_files$self,  code_struc=self_illness_codes)
data_files$hesin <- rename_cols(data_files$hesin, code_struc=hesin_to_extract)
data_files$diag  <- rename_cols(data_files$diag,  code_struc=hesdiag_to_extract)
data_files$oper  <- rename_cols(data_files$oper,  code_struc=hesproc_to_extract)
data_files$gp    <- rename_cols(data_files$gp,    code_struc=gp_clinical_codes)
data_files$med   <- rename_cols(data_files$med,   code_struc=gp_medication_codes)

lapply(data_files, head, n = 1)

eid,reason_lost_fu,sex,age,ethnicity_1,ethnicity_2,ethnicity_3,ethnicity_4,genetic_sex,genetic_ethnicity,pc1,pc2,pc3,pc4,pc5
<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1062757,,0,62,1001,,,,0,1,-12.6693,4.30928,-2.23438,-0.109226,-7.364

eid,self_rep_ill_1,self_rep_ill_2,self_rep_ill_3,self_rep_ill_4,self_rep_ill_5,self_rep_ill_6,self_rep_ill_7,self_rep_ill_8,self_rep_ill_9,⋯,self_rep_proc_year_119,self_rep_proc_year_120,self_rep_proc_year_121,self_rep_proc_year_122,self_rep_proc_year_123,self_rep_proc_year_124,self_rep_proc_year_125,self_rep_proc_year_126,self_rep_proc_year_127,self_rep_proc_year_128
<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,<int>,⋯,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
1000074,1351,,,,,,,,,⋯,,,,,,,,,,

eid,ins_index,epistart,admidate
<int>,<int>,<IDate>,<IDate>
3026745,17,2009-04-23,2009-04-23

eid,ins_index,diag_icd9,diag_icd10
<int>,<int>,<chr>,<chr>
2097360,0,,D140

eid,ins_index,oper3,oper4
<int>,<int>,<int>,<chr>
2940276,4,,W822

eid,data_provider,date,read_2,read_3,value1,value2,value3
<int>,<int>,<IDate>,<chr>,<chr>,<chr>,<chr>,<chr>
1545611,3,2001-07-11,,XE2JU,,,

eid,data_provider,date,read_2,bnf_code,dmd_code,drug_name,quantity
<int>,<int>,<IDate>,<chr>,<chr>,<int64>,<chr>,<chr>
2894391,3,2001-11-08,,04.07.02.00.00,,Kapake 30mg/500mg tablets (Galen Ltd),100 tablet(s)


## Data processing

In [31]:
codes <- fread(file.path("heRmes", "inst", "extdata", "hermes_furosemide_codes", "hermes_furosemide_codes.tsv"))
codes[, code := sub("^'(.+?)'$", "\\1", code)]
head(codes)

concept,code,code_type,description
<chr>,<chr>,<chr>,<chr>
Heart Failur,1076',ukbb_self_reported_illness,heart failure/pulmonary odema
Heart Failure,0205052AEAAAAAA,bnf,Sacubitril/Valsartan_Tab 49mg/51mg
Heart Failure,0205052AEAAABAB,bnf,Sacubitril/Valsartan_Tab 97mg/103mg
Heart Failure,0205052AEAAACAC,bnf,Sacubitril/Valsartan_Tab 24mg/26mg
Heart Failure,0205052AEBBAAAA,bnf,Entresto_Tab 49mg/51mg
Heart Failure,0205052AEBBABAB,bnf,Entresto_Tab 97mg/103mg
