# Calculating my polygenic scores

In [None]:
import traceback
from glob import glob
from pathlib import Path
import numpy as np
import pandas as pd
import sqlite3
from typing import List, Optional, Tuple, Dict
import pysam
import rsidx
from tqdm import tqdm
from search_your_dna.pgscatalog import read_or_download_pgs_scoring_file, PGS_METHOD_MAPPING_TO_METHOD_CATEGORIES, \
    calc_polygenic_score, calc_all_polygenic_scores, to_gene_dosage_df, clean_rsids, do_polygenic_score_calculation
from search_your_dna.util import read_raw_zipped_polygenic_score_file, \
    read_raw_zipped_polygenic_score_file_with_chrom_pos, search_for_rsids

file_my_vcf = "data/GFX0237425.GRCh38.p7.annotated.hg38_multianno.updated.vcf"
max_pgs_alleles = 200

## Calc scores which have rsids in the pgs score files

In [None]:
pgs_file = "data/pgs/PGS000021.txt.gz"
res1 = calc_polygenic_score(my_vcf_file=file_my_vcf, pgs_file=pgs_file, hg19_rsid_chrom_pos_mapping_file="", max_pgs_alleles=max_pgs_alleles)
res1

In [None]:
all_pgs_scores, errors = calc_all_polygenic_scores(files=list(glob("data/pgs/PGS00*.txt.gz")), file_my_vcf=file_my_vcf)

In [None]:
all_pgs_scores.to_csv("data/pgs_results.csv", index=None, sep="\t")

In [None]:
all_pgs_scores = all_pgs_scores.sort_values(by=["score","pgs_id"])


## Calc scores which have only chrom/pos values in the pgs score files

### Download rsid to hg19 chrom-pos metadata files with annovar

In [None]:
!~/bin/annovar/annotate_variation.pl -buildver hg19 -downdb -webfrom annovar avsnp150 data/humandb/

### Create tabix index for metadata file

In [None]:
!bgzip -c data/humandb/hg19_avsnp150.txt > data/humandb/hg19_avsnp150.txt.gz
!tabix --begin 2 --end 3 --sequence 1 data/humandb/hg19_avsnp150.txt.gz

### Calculate pgs score

In [None]:
hg19_rsid_chrom_pos_mapping_file = "data/humandb/hg19_avsnp150.txt.gz"
pgs_file = "data/pgs/PGS000007.txt.gz"

res2 = calc_polygenic_score(my_vcf_file=file_my_vcf, pgs_file=pgs_file, hg19_rsid_chrom_pos_mapping_file=hg19_rsid_chrom_pos_mapping_file, max_pgs_alleles=max_pgs_alleles)