Workflow used to **apply** selected PGS scores into imputation data using **pgs-calc** (https://github.com/lukfor/pgs-calc)

In [None]:
import os
import glob
from datetime import date

basedir = "/labs/tassimes/rodrigoguarischi/projects/sea/apply_grs/"

# Change working directory
os.chdir(basedir)

# Run all scores present pgs_reference_weights folder
# ref_weights_paths = glob.glob( "./pgs_reference_weights/20220511_pgss/shoa_scores/*.txt.gz" )
ref_weights_paths = glob.glob( "./pgs_reference_weights/*.txt.gz" )
ref_weights = [ os.path.basename(pgs_path).split(".")[0] for pgs_path in ref_weights_paths ]
ref_weights_paths = ",".join([pgs_path for pgs_path in ref_weights_paths])

# Create output folder named as raw_scores_<TODAYS_DATE>
today_date = date.today().strftime("%Y%m%d")
output_folder = "raw_scores_" + today_date
os.makedirs( output_folder )

# Dictionary with paths to imputed VCF files for HRC and TOPMed
imputed_genotypes = {
    "hrc_whites": "../imputed_data/michigan_hrc/whites/*.vcf.gz",
    "hrc_blacks": "../imputed_data/michigan_hrc/blacks/*.vcf.gz",
    "topmed_whites": "../imputed_data/topmed/whites/liftover_hg19/*no_chr_prefix.vcf.gz",
    "topmed_blacks": "../imputed_data/topmed/blacks/liftover_hg19/*no_chr_prefix.vcf.gz"
    }
      
# Run pgs-calc for hrc and topmed imputed genotypes for multiple r2 thresholds
for reference_panel in imputed_genotypes:
        
    for min_r2 in [0, 0.3, 0.5, 0.8]:
        
        print("Calculating scores for {0} at min R2 >= {1}".format( ", ".join(ref_weights), min_r2 ) )
        
        output_files_basename = output_folder + "/" + "_".join( [reference_panel, today_date, "multiGRS", ( "r" + str(min_r2).replace(".","") ) ] )
        
        info_report_filename = output_files_basename + ".info.txt"
        html_report_filename = output_files_basename + ".html"
        output_scores_filename = output_files_basename + ".scores.txt"
        
        # Run pgs-calc using GENOTYPE information, instead of DOSAGE (the default)
        # --genotypes=GT \     
        !./pgs-calc/pgs-calc apply \
            --ref {ref_weights_paths} \
            --minR2 {min_r2} \
            --threads 23 \
            --no-ansi \
            --info {info_report_filename} \
            --report-html={html_report_filename} \
            --out {output_scores_filename} \
            { imputed_genotypes[reference_panel] }