In [None]:
import json
from pymedgraph.manager import MedGraphManager

In [None]:
# build request
key_term = 'phenylketonuria'
request_json = json.dumps({'disease': key_term, 'n_articles': 100})
request_json

In [None]:
# init manager
manager = MedGraphManager(config_path='../pymedgraph/localconfig.json')
# get results for request
result = manager.construct_med_graph(request_json)
len(result)

In [None]:
# get terms
mesh_terms = list()
key_words = list()
diseas_list = list()
chemical_list = list()
for paper_id, paper_val in result.items():
    mesh_terms += paper_val['mesh_terms']
    key_words += paper_val['key_words']
    diseas_list += [e[0] for e in paper_val['entities'] if e[1] == 'DISEASE']
    chemical_list += [e[0] for e in paper_val['entities'] if e[1] == 'CHEMICAL']
len(mesh_terms), len(key_words), len(diseas_list), len(chemical_list)

In [None]:
# might need to be installed
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# MeSH terms
fig = plt.figure(figsize=(14,8))
pd.Series(mesh_terms).value_counts()[:50].plot(kind='bar')
plt.title('MeSH')
plt.show()

In [None]:
# KeyWords terms
fig = plt.figure(figsize=(14,8))
pd.Series(key_words).value_counts()[:50].plot(kind='bar')
plt.title('KeyWords')
plt.show()

In [None]:
# disease terms
fig = plt.figure(figsize=(14,8))
pd.Series(diseas_list).value_counts()[:50].plot(kind='bar')
plt.title('disease')
plt.show()

In [None]:
# chemical terms
fig = plt.figure(figsize=(14,8))
pd.Series(chemical_list).value_counts()[:50].plot(kind='bar')
plt.title('chemical')
plt.show()

In [None]:
import networkx as nx

In [None]:
disease_top20 = list(pd.Series(diseas_list).value_counts()[:20].index)
chemical_top20 = list(pd.Series(chemical_list).value_counts()[:20].index)
# method to build df
def build_graph_df(r_dict, top_disease, top_chemical):
    source_list = list()
    target_list = list()
    for k, v in r_dict.items():
        for e, label in v['entities']:
            if label == 'CHEMICAL' and e in top_chemical:
                source_list.append(k)
                target_list.append(e)
            elif label == 'DISEASE' and e in top_disease:
                source_list.append(k)
                target_list.append(e)
    return pd.DataFrame({'source': source_list, 'target': target_list})

In [None]:
# build df with edges
df = build_graph_df(result, disease_top20, chemical_top20)

In [None]:
# build graph
G = nx.from_pandas_edgelist(df, 'source', 'target')

In [None]:
# build color list
colors = list()
for v in G:
    if v in disease_top20:
        colors.append('#2871cc')
    elif v in chemical_list:
        colors.append('yellow')
    else:
        colors.append('lightgray')
# build labels dict
labels_ = dict()
for e in disease_top20 + chemical_top20:
    labels_[e] = e
len(labels_)

In [None]:
# draw graph
pos = nx.spring_layout(G)
betCent = nx.betweenness_centrality(G, normalized=True, endpoints=True)
node_color = colors
node_size =  [v * 10000 for v in betCent.values()]
plt.figure(figsize=(20,20))
nx.draw_networkx(G, pos=pos, with_labels=True,
                 labels=labels_,
                 node_color=node_color,
                 node_size=node_size,
                 edge_color='gray'
                )
plt.axis('off')