Workflow used to **apply** selected PGS scores into imputation data using **pgs-calc** (https://github.com/lukfor/pgs-calc)

In [None]:
import os

basedir = "/labs/tassimes/rodrigoguarischi/projects/sea/apply_grs"

# Change working directory
os.chdir(basedir)

imputed_genotypes = "/labs/tassimes/rodrigoguarischi/projects/sea/imputed_data/michigan_hrc/*.vcf.gz"
# imputed_genotypes = "/labs/tassimes/rodrigoguarischi/projects/sea/imputed_data/topmed/liftover_hg19/*no_chr_prefix.vcf.gz"

ref_weights=["wGRS49", "PGS000349", "PGS000018", "PGS000667", "PGS000889"]
ref_weights_folder="./pgs_reference_weights/"

# Create full path to files
ref_weights_paths = ",".join([ref_weights_folder + pgs_name + ".txt.gz" for pgs_name in ref_weights])

for min_r2 in [0, 0.3, 0.5, 0.8]:
    
    print("Calculating scores for {0} at min R2 >= {1}".format( ", ".join(ref_weights), min_r2 ) )
    
    output_scores_filename = "_".join(ref_weights) + "_r" + str(min_r2).replace(".","") + ".scores.txt"
    html_report_filename = "_".join(ref_weights) + "_r" + str(min_r2).replace(".","") + ".html"
    
    !./pgs-calc/pgs-calc apply \
        --ref {ref_weights_paths} \
        --minR2 {min_r2} \
        --threads 22 \
        --no-ansi \
        # Run pgs-calc using GENOTYPE information, instead of DOSAGE (the default)
        # --genotypes=GT \ 
        --report-html={html_report_filename} \
        --out {output_scores_filename} \
        {imputed_genotypes}