Workflow used to **apply** selected PGS scores into imputation data using **pgs-calc** (https://github.com/lukfor/pgs-calc)

In [1]:
import os
from datetime import date

basedir = "/labs/tassimes/rodrigoguarischi/projects/sea/apply_grs"

# Change working directory
os.chdir(basedir)

# List scores to run against (must match names on ./pgs_reference_weights)
ref_weights=["wGRS49", "PGS000349", "PGS000018", "PGS000667", "PGS000889"]
ref_weights_folder="./pgs_reference_weights/"

# Create full path to files
ref_weights_paths = ",".join([ref_weights_folder + pgs_name + ".txt.gz" for pgs_name in ref_weights])

# Create output folder named as raw_scores_<TODAYS_DATE>
output_folder = "raw_scores_" + date.today().strftime("%Y%m%d")
os.makedirs( output_folder )

# Dictionary with paths to imputed VCF files for HRC and TOPMed
imputed_genotypes = {
    "hrc": "../imputed_data/michigan_hrc/*.vcf.gz",
    "topmed": "../imputed_data/topmed/liftover_hg19/*no_chr_prefix.vcf.gz"
    }

# Run pgs-calc for hrc and topmed imputed genotypes for multiple r2 thresholds
for reference_panel in imputed_genotypes:
        
    for min_r2 in [0, 0.3, 0.5, 0.8]:
        
        print("Calculating scores for {0} at min R2 >= {1}".format( ", ".join(ref_weights), min_r2 ) )
        
        output_files_basename = output_folder + "/" + reference_panel + "_"
        
        info_report_filename = output_files_basename + "_".join(ref_weights) + "_r" + str(min_r2).replace(".","") + ".info.txt"
        html_report_filename = output_files_basename + "_".join(ref_weights) + "_r" + str(min_r2).replace(".","") + ".html"
        output_scores_filename = output_files_basename + "_".join(ref_weights) + "_r" + str(min_r2).replace(".","") + ".scores.txt"

        # Run pgs-calc using GENOTYPE information, instead of DOSAGE (the default)
        # --genotypes=GT \     
        !./pgs-calc/pgs-calc apply \
            --ref {ref_weights_paths} \
            --minR2 {min_r2} \
            --threads 22 \
            --no-ansi \
            --info {info_report_filename} \
            --report-html={html_report_filename} \
            --out {output_scores_filename} \
            { imputed_genotypes[reference_panel] }

Calculating scores for wGRS49, PGS000349, PGS000018, PGS000667, PGS000889 at min R2 >= 0

pgs-calc 0.9.16
https://github.com/lukfor/pgs-calc
(c) 2020 - 2022 Lukas Forer


Input:
  ref: ./pgs_reference_weights/wGRS49.txt.gz,./pgs_reference_weights/PGS000349.txt.gz,./pgs_reference_weights/PGS000018.txt.gz,./pgs_reference_weights/PGS000667.txt.gz,./pgs_reference_weights/PGS000889.txt.gz
  out: raw_scores_20220426/hrc_wGRS49_PGS000349_PGS000018_PGS000667_PGS000889_r0.scores.txt
  genotypes: DS
  minR2: 0.0
  vcfs (23):
   - ../imputed_data/michigan_hrc/chr10.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr11.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr12.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr13.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr14.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr15.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr16.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr17.dose.vcf.gz
   - ../imputed_data/michigan_hrc/chr18.dose.vcf.gz
   - ../imp