In [1]:
suppressWarnings(suppressPackageStartupMessages({
    library(tidyverse)
    library(data.table)
}))


In [3]:
# input
Csq_f <- '/oak/stanford/groups/mrivas/ukbb24983/cal/annotation_20200912/ukb24983_cal_cALL_v2_hg19.vep101.noLoF.Csq.tsv.gz'
HLA_f <- '/oak/stanford/groups/mrivas/ukbb24983/hla/pgen/ukb_hla_v3.pvar'
CNV_f <- '/oak/stanford/groups/mrivas/ukbb24983/cnv/pgen/cnv.pvar'

# output
p_factor_v4 <- '/oak/stanford/groups/mrivas/ukbb24983/array-combined/snpnet/penalty.v4.rds'


In [4]:
HLA_df <- fread(HLA_f) %>%
rename('CHROM'='#CHROM') %>%
mutate(ID_ALT = paste(ID, ALT, sep='_'), w=.75)

CNV_df <- fread(CNV_f) %>%
rename('CHROM'='#CHROM') %>%
mutate(ID_ALT = paste(ID, ALT, sep='_'), w=1)


In [5]:
Csq <- fread(Csq_f) %>%
rename('CHROM'='#CHROM')


In [6]:
weights_df <- data.frame(
    Csq_priority=1:6,
    Csq = c('ptv', 'pav', 'pcv', 'intron', 'utr', 'others'),
    w = c(.5, .75,  1, 1, 1, 1),
    stringsAsFactors=F
)


In [7]:
Csq %>% count(Csq) %>%
left_join(weights_df, by='Csq') %>%
arrange(Csq_priority)


Csq,n,Csq_priority,w
<chr>,<int>,<int>,<dbl>
ptv,28321,1,0.5
pav,89161,2,0.75
pcv,11282,3,1.0
intron,358439,4,1.0
utr,7928,5,1.0
others,310295,6,1.0


In [8]:
bind_rows(
    Csq %>%
    left_join(weights_df, by='Csq') %>%
    mutate(ID_ALT = paste(ID, ALT, sep='_')) %>%
    select(ID_ALT, w),
    
    HLA_df %>% select(ID_ALT, w),
    
    CNV_df %>% select(ID_ALT, w)
) -> weights


In [9]:
weights %>% count(w)


w,n
<dbl>,<int>
0.5,28321
0.75,89523
1.0,963124


In [11]:
weights %>%
deframe() %>%
saveRDS(file = p_factor_v4)
