In [1]:
import os
import sys
import hpotk
sys.path.insert(0, os.path.abspath('../src/'))

from genophenocorr.patient import PhenopacketPatientCreator
from genophenocorr.phenotype import PhenotypeCreator
from genophenocorr.protein import UniprotProteinMetadataService, ProteinAnnotationCache, ProtCachingFunctionalAnnotator
from genophenocorr.variant import VarCachingFunctionalAnnotator, VariantAnnotationCache, VepFunctionalAnnotator
from genophenocorr.cohort import PhenopacketCohortCreator, CohortAnalysis, CohortViewer

In [2]:
fpath_phenopackets = '/home/peter/GIT/phenopacket-store/phenopackets/OFD1/'
fpath_hpo = '/home/peter/GIT/human-phenotype-ontology/src/ontology/hp.json'

In [3]:
cache_dir = 'CACHE'
if not os.path.isdir(cache_dir):
    os.mkdir(cache_dir)

In [4]:
hpo: hpotk.ontology.Ontology = hpotk.ontology.load.obographs.load_ontology(fpath_hpo)
validators = [
    hpotk.validate.AnnotationPropagationValidator(hpo),
    hpotk.validate.ObsoleteTermIdsValidator(hpo),
    hpotk.validate.PhenotypicAbnormalityValidator(hpo)
]
phenotype_creator = PhenotypeCreator(hpo, hpotk.validate.ValidationRunner(validators))


In [5]:
# Protein metadata
pm = UniprotProteinMetadataService()
pac = ProteinAnnotationCache(cache_dir)
pfa = ProtCachingFunctionalAnnotator(pac, pm)

# Functional annotator
vac = VariantAnnotationCache(cache_dir)
vep = VepFunctionalAnnotator(pfa)
vfa = VarCachingFunctionalAnnotator(vac, vep)


# Assemble the patient creator
pc = PhenopacketPatientCreator(phenotype_creator, vfa)
    

In [6]:
cc = PhenopacketCohortCreator(pc)
patientCohort = cc.create_cohort(fpath_phenopackets)

In [7]:
patientCohort.list_all_phenotypes()

[('HP:0001263', 13),
 ('HP:0001249', 11),
 ('HP:0000256', 10),
 ('HP:0002795', 9),
 ('HP:0002205', 9),
 ('HP:0000202', 7),
 ('HP:0001161', 6),
 ('HP:0001829', 6),
 ('HP:0001156', 4),
 ('HP:0011355', 4),
 ('HP:0000369', 3),
 ('HP:0002419', 3),
 ('HP:0000510', 3),
 ('HP:0410263', 3),
 ('HP:0001596', 3),
 ('HP:0001159', 3),
 ('HP:0000113', 3),
 ('HP:0000164', 3),
 ('HP:0001513', 2),
 ('HP:0033454', 2),
 ('HP:0001344', 2),
 ('HP:0010442', 2),
 ('HP:0012471', 2),
 ('HP:0001320', 2),
 ('HP:0030084', 2),
 ('HP:0000218', 1),
 ('HP:0011304', 1),
 ('HP:0001561', 1),
 ('HP:0002591', 1),
 ('HP:0001252', 1),
 ('HP:0001310', 1),
 ('HP:0001520', 1),
 ('HP:0000252', 1),
 ('HP:0004325', 1),
 ('HP:0004322', 1),
 ('HP:0001270', 1),
 ('HP:0001007', 1),
 ('HP:0000431', 1),
 ('HP:0001945', 1),
 ('HP:0025161', 1),
 ('HP:0000365', 1),
 ('HP:0000324', 1),
 ('HP:0001407', 1),
 ('HP:0011359', 1),
 ('HP:0000494', 1),
 ('HP:0000268', 1),
 ('HP:0002208', 1),
 ('HP:0001885', 1),
 ('HP:0000023', 1)]

In [8]:
cohortViewer = CohortViewer(cohort=patientCohort, hpo=hpo)

In [10]:
from IPython.display import HTML, display
display(HTML(cohortViewer.hpo_term_counts_table(min_count=2)))

0,1
HPO Term,Count
Global developmental delay,13
Intellectual disability,11
Macrocephaly,10
Abnormal respiratory system physiology,9
Recurrent respiratory infections,9
Orofacial cleft,7
Hand polydactyly,6
Foot polydactyly,6
Brachydactyly,4
