# Get results based on my snp values

In [None]:
from functools import reduce
from pathlib import Path

import pandas as pd
import pysam
from IPython.core.display import display

from search_your_dna.pgscatalog import get_all_pgs_api_data, read_or_download_pgs_scoring_file, calc_polygenic_score, \
    PGS_METHOD_MAPPING_TO_METHOD_CATEGORIES, MethodCategories
from search_your_dna.util import get_genotype_for_chrom_pos

In [None]:
cache_file_name = "data/vcf_records.parquet.gz"
snp_db_file = "/home/s/src/search_your_dna/data/ncbi_snpdb_all_ids.sqlite"
vcf_file_paths = [Path(f) for f in [
    "/home/s/Dropbox/Siim/health/genetest_2020/GFX0237425.cnv.vcf.gz",
    "/home/s/Dropbox/Siim/health/genetest_2020/GFX0237425.filtered.indel.vcf.gz",
    "/home/s/Dropbox/Siim/health/genetest_2020/GFX0237425.filtered.snp.vcf.gz",
    "/home/s/Dropbox/Siim/health/genetest_2020/GFX0237425.sv.vcf.gz"
]]

## Get my genotype for the disease

In [None]:
bam_file_old = "/home/s/Dropbox/Siim/health/genetest_2020/GFX0237425.bam"
bam_file = "/home/s/Dropbox/Siim/health/genetest_2020/GFX0237425.GRCh38.p7.bam"
alignment_data_old = pysam.AlignmentFile(bam_file_old, "rb")
alignment_data = pysam.AlignmentFile(bam_file, "rb")

In [None]:
print("ALPHA-1 ANTITRYPSIN DEFICIENCY")
display((get_genotype_for_chrom_pos(alignment_data, "14", 94847386),))
print("Aspirin")
display((get_genotype_for_chrom_pos(alignment_data, "5", 179220638),))

### Collect PGS data

#### Get all traits available in pgs catalogue

In [None]:
all_traits_result = get_all_pgs_api_data("trait/all")

In [None]:
all_pgs_traits_df = pd.DataFrame(all_traits_result)
print(all_pgs_traits_df.columns)

In [None]:
pgs_ids = set(reduce(lambda a, b: a + b, all_pgs_traits_df["associated_pgs_ids"].to_list(), []))
len(pgs_ids)

#### Get pgs entities from pgs

##### Get or download all pgs scoring files

In [None]:
%%time
pgs_score_dfs = []
for pgs_id in sorted(pgs_ids):
    try:
        response_data, score_df = read_or_download_pgs_scoring_file(pgs_id)
        score_df.attrs["metadata"] = response_data
        pgs_score_dfs.append(score_df)
    except Exception as e:
        print(f"Something went wrong when parsing pgs {pgs_id} file.", e)

##### Calc some PGS score metadata

In [None]:
methods = set()
traits = set()
efo_terms = set()  # for more about efo see: https://www.ebi.ac.uk/ols/ontologies/efo
for pgs_score_df in pgs_score_dfs:
    methods.add(pgs_score_df.attrs["metadata"]["method_name"])
    traits.add(pgs_score_df.attrs["metadata"]["trait_reported"])
    for trait in pgs_score_df.attrs["metadata"]["trait_efo"]:
        if trait["id"].startswith("EFO_"):
            efo_terms.add(trait["label"])
with open("data/pgs/metadata_methods.txt", "w") as f:
    f.write(str(methods))
with open("data/pgs/metadata_traits.txt", "w") as f:
    f.write(str(traits))
with open("data/pgs/metadata_efo_terms.txt", "w") as f:
    f.write(str(efo_terms))
f"#methods: {len(methods)} and #traits: {len(traits)} and #terms {len(efo_terms)}"

### Calculate PGS for my dna

#### Run all PGS calculations

In [None]:
%time
my_pgs_dfs = []
for pgs_score_df in pgs_score_dfs:
    analysis_method = PGS_METHOD_MAPPING_TO_METHOD_CATEGORIES.get(pgs_score_df.attrs["metadata"]["method_name"])
    if analysis_method is None or analysis_method == MethodCategories.UNKNOWN:
        continue
    try:
        my_pgs, my_pgs_df = calc_polygenic_score(snp_db_file=snp_db_file, max_pgs_alleles=50, pgs_df=pgs_score_df)
        my_pgs_df.attrs["metadata"] = pgs_score_df.attrs["metadata"]
        my_pgs_df.attrs["score"] = my_pgs
        my_pgs_dfs.append(my_pgs_df)
    except Exception as e:
        print(f"Failed {pgs_score_df.attrs['metadata']['id']} with error:", e)