In [1]:
import typing
import os

import hpotk
from phenopackets import Phenopacket

from genophenocorr.patient import PhenopacketPatientCreator
from genophenocorr.phenotype import PhenotypeCreator
from genophenocorr.protein import UniprotProteinMetadataService, ProteinAnnotationCache, ProtCachingFunctionalAnnotator
from genophenocorr.variant import VarCachingFunctionalAnnotator, VariantAnnotationCache, VepFunctionalAnnotator
from genophenocorr.cohort import PhenopacketCohortCreator, CohortAnalysis


In [2]:
fpath_hpo = 'hpo_data/hp.json'
cache_dir = 'KBG/annotations'
fpath_phenopackets = 'KBG/phenopackets'
tx_id = 'NM_013275.6'
protein_id = 'Q15327'
if not os.path.isdir(cache_dir):
    os.mkdir(cache_dir)

In [3]:
hpo: hpotk.ontology.Ontology = hpotk.ontology.load.obographs.load_ontology(fpath_hpo)
validators = [
    hpotk.validate.AnnotationPropagationValidator(hpo),
    hpotk.validate.ObsoleteTermIdsValidator(hpo),
    hpotk.validate.PhenotypicAbnormalityValidator(hpo)
]
phenotype_creator = PhenotypeCreator(hpo, hpotk.validate.ValidationRunner(validators))

In [4]:
# Protein metadata
pm = UniprotProteinMetadataService()
pac = ProteinAnnotationCache(cache_dir)
pfa = ProtCachingFunctionalAnnotator(pac, pm)

# Functional annotator
vac = VariantAnnotationCache(cache_dir)
vep = VepFunctionalAnnotator(pfa)
vfa = VarCachingFunctionalAnnotator(vac, vep)


# Assemble the patient creator
pc = PhenopacketPatientCreator(phenotype_creator, vfa)

In [5]:
cc = PhenopacketCohortCreator(pc)

In [None]:
patientCohort = cc.create_cohort(fpath_phenopackets)

Expected at least one HPO term per patient, but received none for patient VanDongen2019_P2
Expected at least one HPO term per patient, but received none for patient VanDongen2019_P12
Expected at least one HPO term per patient, but received none for patient Reuter2020
Expected at least one HPO term per patient, but received none for patient Novara, 2017_P10
Expected at least one variant per patient, but received none for patient Parenti2016_P1
Expected at least one HPO term per patient, but received none for patient VanDongen2019_P13
Expected at least one HPO term per patient, but received none for patient VanDongen2019_P8
Expected at least one HPO term per patient, but received none for patient VanDongen2019_P4
Expected at least one HPO term per patient, but received none for patient VanDongen2019_P5
Expected at least one HPO term per patient, but received none for patient KBG31B
Expected at least one HPO term per patient, but received none for patient VanDongen2019_P9
Expected at leas

In [None]:
patientCohort.list_all_phenotypes()

In [None]:
patientCohort.list_all_variants()

In [None]:
patientCohort.list_all_patients()

In [None]:
patientCohort.list_data_by_tx('NM_013275.6')

In [None]:
patientCohort.list_data_by_tx()

In [None]:
patientCohort.all_proteins

In [None]:
analysis = CohortAnalysis(patientCohort, 'NM_013275.6', include_unmeasured=False)
analysis._testing_hpo_terms

In [None]:
from genophenocorr.constants import variant_effects

In [None]:
analysis.compare_by_variant_type(variant_effects.FRAMESHIFT_VARIANT)

In [None]:
analysis.compare_by_variant('16_89284634_GTGTTT/G')

In [None]:
analysis2 = CohortAnalysis(patientCohort, 'NM_013275.6', include_unmeasured=False, include_large_SV=False)

analysis2.compare_by_exon(9)

In [None]:
from genophenocorr.protein import FeatureType


In [None]:
analysis.compare_by_protein_feature_type(FeatureType.REGION)