
# Exploration of the modulators and downstream effectors of HDAC6

In [16]:
import time
import sys
import getpass
from collections import defaultdict

import bel_repository
import bio2bel_hgnc
import bio2bel_famplex
import pandas as pd
import pybel
import pybel_jupyter
import pybel_tools
import hbp_knowledge
from pybel.dsl import Protein
from pybel.struct import get_subgraph_by_neighborhood, get_subgraph_by_second_neighbors
from pybel.struct.mutation import (
    remove_biological_processes, remove_pathologies, remove_isolated_nodes, remove_associations,
)
from pybel.manager.citation_utils import get_pubmed_citation_response
from pybel_tools.mutation import enrich_complexes, enrich_variants, expand_internal

In [2]:
print(f'''BEL Repository: v{bel_repository.get_version()}
PyBEL: v{pybel.get_version()}
PyBEL-Jupyer: v{pybel_jupyter.get_version()}
PyBEL-Tools: v{pybel_tools.get_version()}
pharamcome/knowledge: v{hbp_knowledge.VERSION}
''')

BEL Repository: v0.0.8-dev
PyBEL: v0.13.3-dev
PyBEL-Jupyer: v0.2.2-dev
PyBEL-Tools: v0.7.4-dev
pharamcome/knowledge: v0.0.5



In [3]:
print(sys.version)

3.7.3 (default, Mar 27 2019, 09:23:39) 
[Clang 10.0.0 (clang-1000.11.45.5)]


In [4]:
print(time.asctime())

Thu Jul 25 13:35:19 2019


In [5]:
print(getpass.getuser())

cthoyt


In [6]:
graph = hbp_knowledge.repository.get_graph()
remove_associations(graph)
remove_pathologies(graph)
remove_isolated_nodes(graph)
graph.summarize()

Human Brain Pharmacome Knowledge v0.0.5
Number of Nodes: 5258
Number of Edges: 15094
Number of Citations: 340
Number of Authors: 1910
Network Density: 5.46E-04
Number of Components: 37


In [18]:
# TODO add expansion
famplex_manager = bio2bel_famplex.Manager()
famplex_manager

<bio2bel_famplex.manager.Manager at 0x124143b00>

In [7]:
hgnc_manager = bio2bel_hgnc.Manager()
hgnc_manager.normalize_genes(graph)
graph.summarize()

Human Brain Pharmacome Knowledge v0.0.5
Number of Nodes: 5258
Number of Edges: 15094
Number of Citations: 340
Number of Authors: 1910
Network Density: 5.46E-04
Number of Components: 37


In [19]:
# Find all HDAC* in the BEL graph

hdacs = []
for node in graph:
    name = node.get('name')
    if name is not None and name.startswith('HDAC'):
        hdacs.append(node)
        
hdacs

[<BEL p(FPLX:HDAC)>, <BEL p(hgnc:HDAC6)>, <BEL p(hgnc:HDAC3)>]

In [20]:
#subgraph = get_subgraph_by_second_neighbors(graph, [hdac6])
subgraph = get_subgraph_by_neighborhood(graph, hdacs)
enrich_complexes(subgraph)
enrich_variants(subgraph)
#expand_internal(graph, subgraph)
subgraph.summarize()

None vNone
Number of Nodes: 53
Number of Edges: 56
Number of Citations: 19
Number of Authors: 123
Network Density: 2.03E-02
Number of Components: 3


In [21]:
pybel_jupyter.to_jupyter(subgraph)

<IPython.core.display.Javascript object>

In [22]:
pmid_to_targets = defaultdict(set)

for u, v, data in subgraph.edges(data=True):
    if 'citation' in data:
        for x in u, v:
            if 'name' in x and x['namespace'] == 'hgnc':
                pmid_to_targets[data['citation']['reference']].add(x)

pmid_to_targets = dict(pmid_to_targets)


with open('hdac6-pmids.txt', 'w') as file:
    for pmid in sorted(pmid_to_targets):
        print(f'pmid:{pmid}', file=file)

print(f'there are {len(pmid_to_targets)} PubMed references')

there are 15 PubMed references


In [23]:
pmid_to_targets

{'21427723': {<BEL p(hgnc:MAPT, pmod(Ac))>},
 '18930136': {<BEL p(hgnc:HDAC6)>},
 '28803412': {<BEL p(hgnc:HDAC6)>},
 '23487739': {<BEL p(hgnc:HDAC6)>},
 '27181519': {<BEL p(hgnc:HDAC6)>},
 '26631930': {<BEL p(hgnc:HDAC6)>, <BEL p(hgnc:MAPT, pmod(Ac, Lys))>},
 '28760828': {<BEL p(hgnc:HDAC6)>,
  <BEL p(hgnc:MAPT)>,
  <BEL p(hgnc:MAPT, pmod(Ac, Lys, 321))>,
  <BEL p(hgnc:MAPT, pmod(Ph, Ser, 324))>},
 '29311797': {<BEL p(hgnc:AIP)>, <BEL p(hgnc:HDAC6)>},
 '25546293': {<BEL p(hgnc:CTNNB1, pmod(Ac, Lys, 49))>,
  <BEL p(hgnc:CTNNB1, pmod(Ph, Ser, 33))>,
  <BEL p(hgnc:CTNNB1, pmod(Ph, Ser, 37))>,
  <BEL p(hgnc:CTNNB1, pmod(Ph, Ser, 45))>,
  <BEL p(hgnc:CTNNB1, pmod(Ph, Thr, 41))>,
  <BEL p(hgnc:HDAC6)>},
 '25031639': {<BEL p(hgnc:HDAC6)>,
  <BEL p(hgnc:MAPT, pmod(Ph, Ser, 262))>,
  <BEL p(hgnc:MAPT, pmod(Ph, Ser, 356))>},
 '24844691': {<BEL p(hgnc:HDAC6)>},
 '18558669': {<BEL p(hgnc:HDAC6)>},
 '29844403': {<BEL p(hgnc:HDAC6)>},
 '26751493': {<BEL p(hgnc:HDAC6)>, <BEL p(hgnc:MAPT)>},
 '287719

In [24]:
pubmed_response = get_pubmed_citation_response(pmid_to_targets)

pmid_pmc_map = {}

for pmid in pubmed_response['result']['uids']:
    for article_id in pubmed_response['result'][pmid]['articleids']:
        if 'pmc' == article_id['idtype']:
            pmid_pmc_map[pmid] = article_id['value']

In [25]:
df = pd.DataFrame(
    data=list({
        (
            target.name,
            target.identifier,
            pmid,
            pmid_pmc_map.get(pmid),
        )
    for pmid, targets in pmid_to_targets.items()
    for target in targets
    }), 
    columns=['hgnc.symbol', 'hgnc', 'pmid', 'pmc']
).sort_values(['hgnc.symbol', 'pmid'])

# Output
df.to_csv('hdac6-article-targets.tsv', sep='\t', index=False)

# Show in notebook
df

Unnamed: 0,hgnc.symbol,hgnc,pmid,pmc
8,AIP,358,29311797,PMC5744016
12,CTNNB1,2514,25546293,PMC4372110
5,HDAC3,4854,28771976,PMC5595690
2,HDAC6,14064,18558669,
16,HDAC6,14064,18930136,PMC2621359
14,HDAC6,14064,23487739,PMC3606981
4,HDAC6,14064,24844691,
15,HDAC6,14064,25031639,PMC4075151
13,HDAC6,14064,25546293,PMC4372110
19,HDAC6,14064,26631930,
