## Purpose
* Purpose of this sheet is to simulate a browser session in "demo" (not "live") mode, ie. reading the data from a pre-computed JSON file rather than from a db query.

In [6]:
import sys
sys.path.append('../backend/lib')
sys.path.append('../backend')
from GraphSession import GraphSession
from ResultGraph import ResultGraph
from networkx import generate_gml
import networkx as nx

graphsession = GraphSession('')

resultGraph = ResultGraph()
resultGraph.G = resultGraph.read_json_file('output/demo_network_epigenetics.json')

print resultGraph.G.number_of_nodes()

resultGraph.extract_by_connectivity(connectivity=3)
print resultGraph.G.number_of_nodes()

resultGraph.get_graph(graph_format='cytoscape')

03/30/2018 05:18:00 PM [INFO]: RequestFullTextSearch: Creating request
03/30/2018 05:18:00 PM [INFO]: Max degree: 27


886
158


'{"nodes": [{"data": {"authors": "Conway O\'Brien, Prideaux, Chevassut", "cite_color": "black", "group": "Cited", "name": "24778653", "key": "24778653", "title": "The epigenetic landscape of acute myeloid leukemia.", "journal": "Advances in hematology", "pubDate": " 2014", "id": "24778653", "node_col": "#ff2a2a"}}, {"data": {"authors": "Lorsbach, Moore, Mathew", "cite_color": "black", "group": "Cited", "name": "12646957", "key": "12646957", "title": "TET1, a member of a novel protein family, is fused to MLL in acute myeloid leukemia containing the t(10;11)(q22;q23).", "journal": "Leukemia", "pubDate": "Mar 2003", "id": "12646957", "node_col": "#ffc3c3"}}, {"data": {"authors": "Daigle, Olhava, Therkelsen", "cite_color": "black", "group": "Cited", "name": "23801631", "key": "23801631", "title": "Potent inhibition of DOT1L as treatment of MLL-fusion leukemia.", "journal": "Blood", "pubDate": "Aug 2013", "id": "23801631", "node_col": "#ffc3c3"}}, {"data": {"authors": "Lu, Thompson", "cite_

## Debug top journals display

In [1]:
import sys
sys.path.append('../backend/lib')
sys.path.append('../backend')
from GraphSession import GraphSession
from ResultGraph import ResultGraph
from networkx import generate_gml
import networkx as nx
from networkx.readwrite import json_graph

graphsession = GraphSession('')

resultGraph = ResultGraph()
resultGraph.G = resultGraph.read_json_file('output/demo_network_authors.json')

print resultGraph.G.number_of_nodes()

resultGraph.extract_by_connectivity(connectivity=3)
print resultGraph.G.number_of_nodes()

#resultGraph.get_graph(graph_format='cytoscape')

n_json = json_graph.node_link_data(resultGraph.G)
search_stats = resultGraph.get_search_stats(n_json)
search_stats

05/13/2018 03:10:35 PM [INFO]: RequestFullTextSearch: Creating request


379
66
30


{'max_degree_cited': 22,
 'num_citations': 36,
 'num_links': 195,
 'num_results': 30,
 'top_journals': 'Nature (17), Nat Commun (6), Nucleic Acids Res. (5), Cell (4), Bioinformatics (4)'}

In [16]:
from collections import Counter

journal_counts = Counter(search_stats['journal_lst'])
top_journals = journal_counts.most_common(5)
top_journals = ', '.join(['{0} ({1})'.format(journal[0], journal[1]) for journal in top_journals])
top_journals

'Nature (17), Nat Commun (6), Nucleic Acids Res. (5), Cell (4), Bioinformatics (4)'

In [26]:
from collections import Counter
import itertools

author_lst = [node['authors_all'] for node in n_json['nodes']]
author_lst = list(itertools.chain.from_iterable(author_lst))
author_lst = [author for author in author_lst if author!='']
author_counts = Counter(author_lst)
top_authors = author_counts.most_common(5)
top_authors = ', '.join(['{0} ({1})'.format(author[0], author[1]) for author in top_authors])
top_authors

'Campbell (53), Stratton (30), Futreal (28), Teague (23), Jones (20)'

## Debug pub year histogram

In [1]:
import sys
sys.path.append('../backend/lib')
sys.path.append('../backend')
from GraphSession import GraphSession
from ResultGraph import ResultGraph
from networkx import generate_gml
import networkx as nx
from networkx.readwrite import json_graph

graphsession = GraphSession('')

resultGraph = ResultGraph()
resultGraph.G = resultGraph.read_json_file('output/demo_network_pubyear.json')

print resultGraph.G.number_of_nodes()

resultGraph.extract_by_connectivity(connectivity=3)
print resultGraph.G.number_of_nodes()

#resultGraph.get_graph(graph_format='cytoscape')

n_json = json_graph.node_link_data(resultGraph.G)
search_stats = resultGraph.get_search_stats(n_json)
search_stats

05/16/2018 09:52:31 PM [INFO]: RequestFullTextSearch: Creating request


370
64
29


{'max_degree_cited': 21,
 'num_citations': 35,
 'num_links': 178,
 'num_results': 29,
 'top_authors': 'Campbell (51), Stratton (29), Futreal (27), Teague (22), Jones (20)',
 'top_journals': 'Nature (17), Nucleic Acids Res. (5), Nat Commun (5), Cell (4), Bioinformatics (4)'}

In [5]:
years_lst = [int(node['year']) for node in n_json['nodes']]
years_lst

[2014,
 2014,
 2014,
 2006,
 2014,
 2009,
 2014,
 2012,
 2008,
 2015,
 2009,
 2013,
 2004,
 2013,
 2013,
 2014,
 2011,
 2015,
 2016,
 2008,
 2011,
 2010,
 2015,
 2016,
 2012,
 2012,
 2004,
 2016,
 2011,
 2010,
 2016,
 2011,
 2010,
 2010,
 2009,
 2016,
 2010,
 2010,
 2013,
 2010,
 2014,
 2011,
 2009,
 2015,
 2011,
 2012,
 2012,
 2015,
 2009,
 2011,
 2009,
 2012,
 2015,
 2010,
 2007,
 2012,
 2011,
 2009,
 2012,
 2006,
 2016,
 2016,
 2015,
 2016]

In [3]:
years_lst

[u'2014',
 u'2014',
 u'2014',
 u'2006',
 u'2014',
 u'2009',
 u'2014',
 u'2012',
 u'2008',
 u'2015',
 u'2009',
 u'2013',
 u'2004',
 u'2013',
 u'2013',
 u'2014',
 u'2011',
 u'2015',
 u'2016',
 u'2008',
 u'2011',
 u'2010',
 u'2015',
 u'2016',
 u'2012',
 u'2012',
 u'2004',
 u'2016',
 u'2011',
 u'2010',
 u'2016',
 u'2011',
 u'2010',
 u'2010',
 u'2009',
 u'2016',
 u'2010',
 u'2010',
 u'2013',
 u'2010',
 u'2014',
 u'2011',
 u'2009',
 u'2015',
 u'2011',
 u'2012',
 u'2012',
 u'2015',
 u'2009',
 u'2011',
 u'2009',
 u'2012',
 u'2015',
 u'2010',
 u'2007',
 u'2012',
 u'2011',
 u'2009',
 u'2012',
 u'2006',
 u'2016',
 u'2016',
 u'2015',
 u'2016']