In [5]:
meta_tbl_f <- '@@@@@@/projects/biomarkers/meta/plink_imputed/filtered/GLOBAL_Alanine_aminotransferase.sumstats.tsv.gz '
meta_tbl_out_f <- 'dev.out'
pvar_f <- '@@@@@@/users/ytanigaw/repos/rivas-lab/public-resources/uk_biobank/biomarkers/meta_flipfix/imp_ref_alt_check/ukb_imp_v3.mac1.flipcheck.tsv.gz'



In [1]:
suppressPackageStartupMessages(require(tidyverse))
suppressPackageStartupMessages(require(data.table))


In [None]:
pvar_df <- fread(cmd=paste0('zcat ', pvar_f)) %>% 
mutate(FASTA_ALT = if_else(toupper(REF) == toupper(FASTA_REF), ALT, REF)) %>%
select(-REF, -ALT) %>%
rename('REF' = 'FASTA_REF', 'ALT' = 'FASTA_ALT', 'CHROM' = '#CHROM')

meta_sumstats_df <- fread(cmd=paste0('zcat ', meta_tbl_f))

joined_df <- pvar_df %>% rename('MarkerName' = 'ID') %>%
right_join(meta_sumstats_df, by='MarkerName') %>%
mutate(
    A1_is_ref = (toupper(Allele1) == toupper(REF)),    
    A1_is_alt = (toupper(Allele1) == toupper(ALT)),
    A2_is_ref = (toupper(Allele2) == toupper(REF)),    
    A2_is_alt = (toupper(Allele2) == toupper(ALT)),
    is_not_flipped = (A1_is_alt & A2_is_ref),
    is_flipped     = (A1_is_ref & A2_is_alt)
) %>%
filter(is_flipped | is_not_flipped) %>%
select(-is_not_flipped, -A1_is_ref, -A1_is_alt, -A2_is_ref, -A2_is_alt) 

n_flips <- joined_df %>% select(is_flipped) %>% pull() %>% sum()
print(paste0('The number of allele flips: ', n_flips))


flip_fixed <- joined_df %>% 
mutate(
    Allele1_copy = Allele1,
    Allele1 = if_else(is_flipped, Allele2, Allele1),
    Allele2 = if_else(is_flipped, Allele1_copy, Allele2),
    Effect  = if_else(is_flipped, -1 * Effect, Effect),
    Direction = if_else(
        is_flipped, 
        str_replace_all(str_replace_all(str_replace_all(Direction, '-', 'm'), '\\+', '-'), 'm', '+'),
        Direction        
    )
) %>%
select(-Allele1_copy, -REF, -ALT, -is_flipped) %>%
rename('ALT' = 'Allele1', 'REF' = 'Allele2') 

flip_fixed %>% fwrite(meta_tbl_out_f, sep='\t')

