In [1]:
suppressPackageStartupMessages(require(tidyverse))
suppressPackageStartupMessages(require(data.table))


In [75]:
phe_path <- '/oak/stanford/groups/mrivas/ukbb24983/phenotypedata'
extracted_tab_file <- file.path(phe_path, 'extras/iop/misc/ukb2005693_ukb37855_IOP.tsv')
master_phe_file <- file.path(phe_path, 'extras/iop/misc/ukb2005693_ukb37855_IOP.phe')
GBE_IDs <- c(5254, 5255, 5262, 5263)


In [80]:
phe_dir <- file.path(phe_path, 'extras/iop/phe')

In [16]:
selectCols <- c(simplify2array(c(lapply(GBE_IDs, function(x){c(paste0('f.', x, '.0.0'), paste0('f.', x, '.1.0'))}))))
colClases <- setNames(rep('double', length(selectCols)), selectCols)
colClases[['IID']] <- 'character'

tab_df <- fread(
    extracted_tab_file,
    colClasses = colClases
) %>% 
gather(field, val, -IID) %>%
drop_na(val) %>% 
mutate(
    field = str_replace_all(field, '^f.', '')
) %>%
separate(field, c('Field', 'Time', 'Array'))


In [55]:
tab_df %>% count(Field, Time) %>% 
mutate(Time = paste0('Time=', Time)) %>%
spread(Time, n)


Field,Time=0,Time=1
<chr>,<int>,<int>
5254,112268,19512
5255,112268,19512
5262,111936,19457
5263,111936,19457


In [59]:
median_phenos <- tab_df %>% mutate(
    median_pheno = if_else(
        Field == 5254 | Field == 5262, 
        'INI2005254', # 'corneal-compensated', 
        if_else(
            Field == 5255 | Field == 5263,
            'INI2005255', # 'Goldmann-correlated', 
            ''
        )
    )
) %>% group_by(IID, median_pheno) %>%
summarise(median = median(val)) %>%
spread(median_pheno, median) %>%
ungroup()


In [60]:
INI_phenos <- tab_df %>% group_by(IID, Field) %>%
summarise(median = median(val)) %>%
mutate(Field = paste0('INI', Field)) %>%
spread(Field, median) %>%
ungroup()


In [61]:
median_phenos %>% dim() %>% print()
INI_phenos %>% dim() %>% print()
median_phenos %>% head(0) %>% print()
INI_phenos    %>% head(0) %>% print()


[1] 128943      3
[1] 128943      5
[90m# A tibble: 0 x 3[39m
[90m# … with 3 variables: IID [3m[90m<chr>[90m[23m, INI2005254 [3m[90m<dbl>[90m[23m, INI2005255 [3m[90m<dbl>[90m[23m[39m
[90m# A tibble: 0 x 5[39m
[90m# … with 5 variables: IID [3m[90m<chr>[90m[23m, INI5254 [3m[90m<dbl>[90m[23m, INI5255 [3m[90m<dbl>[90m[23m, INI5262 [3m[90m<dbl>[90m[23m,
#   INI5263 [3m[90m<dbl>[90m[23m[39m


In [70]:
master_IOP_phe <- median_phenos %>%
inner_join(INI_phenos, by=c('IID')) %>%
mutate(FID = IID) %>%
select(FID, IID, INI2005254, INI2005255, INI5254, INI5255, INI5262, INI5263)


In [76]:
master_IOP_phe %>% 
fwrite(master_phe_file, sep='\t', na='NA', quote = F)


In [77]:
master_phe_file

In [82]:
for(GBE_ID in c('INI2005254', 'INI2005255', 'INI5254', 'INI5255', 'INI5262', 'INI5263')){
    master_IOP_phe %>%
    select(FID, IID, GBE_ID) %>%
    drop_na() %>%
    fwrite(file.path(phe_dir, paste0(GBE_ID, '.phe')), sep='\t')
}
