In [1]:
import typing
import os

import hpotk
from phenopackets import Phenopacket

from genophenocorr.patient import PhenopacketPatientCreator
from genophenocorr.phenotype import PhenotypeCreator
from genophenocorr.protein import UniprotProteinMetadataService, ProteinAnnotationCache, ProtCachingFunctionalAnnotator
from genophenocorr.variant import VarCachingFunctionalAnnotator, VariantAnnotationCache, VepFunctionalAnnotator
from genophenocorr.cohort import PhenopacketCohortCreator


In [2]:
fpath_hpo = 'hpo_data/hp.json'
cache_dir = 'testFiles/annotationsSub'
fpath_phenopackets = 'testFiles'
tx_id = 'NM_013275.6'
protein_id = 'Q15327'
if not os.path.isdir(cache_dir):
    os.mkdir(cache_dir)

In [3]:
hpo: hpotk.ontology.MinimalOntology = hpotk.ontology.load.obographs.load_minimal_ontology(fpath_hpo)
validators = [
    hpotk.validate.AnnotationPropagationValidator(hpo),
    hpotk.validate.ObsoleteTermIdsValidator(hpo),
    hpotk.validate.PhenotypicAbnormalityValidator(hpo)
]
phenotype_creator = PhenotypeCreator(hpo, hpotk.validate.ValidationRunner(validators))

In [4]:
# Functional annotator
vac = VariantAnnotationCache(cache_dir)
vep = VepFunctionalAnnotator()
vfa = VarCachingFunctionalAnnotator(vac, vep)

# Protein metadata
pm = UniprotProteinMetadataService()
pac = ProteinAnnotationCache(cache_dir)
pfa = ProtCachingFunctionalAnnotator(pac, pm)

# Assemble the patient creator
pc = PhenopacketPatientCreator(phenotype_creator, vfa, pfa)

In [5]:
cc = PhenopacketCohortCreator(pc)

In [6]:
patientCohort = cc.create_cohort(fpath_phenopackets)



In [7]:
testPat = patientCohort.all_patients.pop()

In [13]:
for var in testPat.variants:
    for trans in var.tx_annotations:
        print(trans.protein_affected)

NP_001243111.1
NP_001243112.1
NP_037407.4


In [14]:
patientCohort.list_all_phenotypes()

HPO ID        Total Patients
----------  ----------------
HP:0000534                35
HP:0000343                35
HP:0001249                34
HP:0011446                34
HP:0006482                32
HP:0001155                28
HP:0010938                27
HP:0012758                26
HP:0004322                24
HP:0000325                 7
HP:0000356                 6
HP:0000365                 5
HP:0000729                 1


In [15]:
patientCohort.list_all_proteins()

Protein ID        Total Patients
--------------  ----------------
NP_001243111.1                40
NP_037407.4                   40
NP_001243112.1                40
NP_037407.4                   40
NP_872337.2                    4
NP_001188336.1                 4
NP_003110.1                    4
NP_955399.1                    4
NP_003110.1                    4
NP_003110.1                    4
NP_005178.4                    2
NP_777577.2                    2
NP_112190.2                    2
NP_000503.1                    2
NP_001136336.2                 2
NP_000503.1                    2
NP_057293.1                    2
NP_000503.1                    2
NP_001305461.1                 2
NP_001230208.1                 2
NP_787127.1                    2
NP_001305459.1                 2
NP_057293.1                    2
NP_001310472.1                 2
NP_057293.1                    2
NP_001025189.1                 2
NP_004924.1                    2
NP_001305453.1                 2
NP_0013054

In [16]:
patientCohort.list_all_variants()

Variant                    Total Patients
-----------------------  ----------------
16_89284524_duplication                 3
16_89284565_G/C                         2
16_89284363_CTTTG/C                     2
16_89284129_CTTTTT/C                    2
16_89279326_G/A                         1
16_89284140_TTTTC/T                     1
16_89283314_CCTTT/C                     1
16_89283233_-/T                         1
16_89281336_CG/C                        1
16_89282166_TTC/T                       1
16_89279126_G/C                         1
16_89284129_CT/C                        1
16_89279542_G/A                         1
16_89282044_G/A                         1
16_89282433_TTC/T                       1
16_88788350_deletion                    1
16_87618421_deletion                    1
16_89285153_TTTTG/T                     1
16_89476288_duplication                 1
16_89283362_-/T                         1
16_89280830_-/A                         1
16_89285255_CGA/C                 

In [17]:
patientCohort.list_all_patients()

['Gnazzo, 2020_P5',
 'Gnazzo, 2020_P8',
 'Gnazzo, 2020_P27',
 'Gnazzo, 2020_P10',
 'Cucco, 2020 (Patient B)',
 'Crippa2015_P1',
 'Gnazzo, 2020_P15',
 'Gnazzo, 2020_P22',
 'Gnazzo, 2020_P18',
 'Gnazzo, 2020_P28',
 'Gnazzo, 2020_P7',
 'Gnazzo, 2020_P12',
 'Gnazzo, 2020_P25',
 'Crippa2015_P3',
 'Gnazzo, 2020_P20',
 'Gnazzo, 2020_P17',
 'Behnert, 2018',
 'Gnazzo, 2020_P31',
 'Gnazzo, 2020_P2',
 'Bucerzan2020',
 'Gnazzo, 2020_P14',
 'Gnazzo, 2020_P23',
 'Gnazzo, 2020_P1',
 'Gnazzo, 2020_P19',
 'Gnazzo, 2020_P4',
 'Gnazzo, 2020_P9',
 'Gnazzo, 2020_P26',
 'Gnazzo, 2020_P11',
 'Gnazzo, 2020_P21',
 'Gnazzo, 2020_P16',
 'Crippa2015_P2',
 'Gnazzo, 2020_P3',
 'Gnazzo, 2020_P30',
 'Gnazzo, 2020_P29',
 'Bianchi, 2018',
 'DeBernardi2018',
 'Gnazzo, 2020_P6',
 'Alves, 2019',
 'Gnazzo, 2020_P13']