# Do enrichment analyses using the Human Phenotype Ontology (HPO)
HPO files located in goatools/notebooks/data/hpo

In [1]:
fin_study = 'data/hpo/genes.list'         # Study genes
fin_pop   = 'data/hpo/gobackground.list'  # Population genes
fin_obo   = 'data/hpo/hp.obo'             # DAG containing HPO terms
fin_anno  = 'data/hpo/hpo.annotation.tab' # Annotation of genes-to-HPO terms

## 1) Read list of study genes and population genes

In [2]:
from goatools.utils import read_geneset

study_ids = read_geneset(fin_study)
population_ids = read_geneset(fin_pop)

    2,252 READ: data/hpo/genes.list
   14,446 READ: data/hpo/gobackground.list


## 2) Load the human phenotype ontology DAG

In [3]:
from goatools.obo_parser import GODag

godag = GODag(fin_obo)

data/hpo/hp.obo: fmt(1.2) rel(hp/2021-02-28) 19,498 Terms


## 3) Load the annotations of genes to sets of HPO terms

In [None]:
from goatools.anno.idtogos_reader import IdToGosReader

annoobj = IdToGosReader(fin_anno, godag=godag)

id2gos = annoobj.get_id2gos()

HMS:0:00:00.555350 187,934 annotations READ: data/hpo/hpo.annotation.tab 
4531 IDs in loaded association branch, human_phenotype


## 4) Run enrichment analysis on HPO terms

In [None]:
from goatools.go_enrichment import GOEnrichmentStudy

goeaobj = GOEnrichmentStudy(
    population_ids,
    annoobj.get_id2gos(),
    godag,
    methods=['bonferroni', 'fdr_bh'],
    pvalcalc='fisher_scipy_stats')

4531 IDs in loaded association branch, human_phenotype

Load  Ontology Enrichment Analysis ...
Propagating term counts up: is_a
 26%  3,823 of 14,446 population items found in association


## 5) Run an enrichment analysis on HPO terms

In [None]:
results = goeaobj.run_study_nts(study_ids)


Runing  Ontology Analysis: current study set of 2252 IDs.
 29%    650 of  2,252 study items found in association
100%  2,252 of  2,252 study items found in population(14446)
Calculating 9,331 uncorrected p-values using fisher_scipy_stats


## 6) Print the results

In [None]:
print('namespace       term_id  e/p pval_uncorr Benjamimi/Hochberg Bonferroni  study_ratio population_ratio')
print('--------------- -------- --- ----------- ------------------ ----------  ----------- ----------------')
pat = '{NS} {GO} {e}    {PVAL:8.2e}           {BH:8.2e}   {BONF:8.2e} {RS:>12} {RP:>12}'
for ntd in sorted(results, key=lambda nt: [nt.p_uncorrected, nt.GO]):
    if ntd.p_fdr_bh < 0.05:
        print(pat.format(
            NS=ntd.NS,
            GO=ntd.GO,
            e=ntd.enrichment,
            RS='{}/{}'.format(*ntd.ratio_in_study),
            RP='{}/{}'.format(*ntd.ratio_in_pop),
            PVAL=ntd.p_uncorrected,
            BONF=ntd.p_bonferroni,
            BH=ntd.p_fdr_bh))
print('e: enriched')
print('p: purified')

Copyright (C) 2021-present, DV Klopfenstein and Haibao Tang. All rights reserved.