# Generate demo data
The point of this notebook is to test and implement the generation of demo data. The data will be used to serve a real-world search query to the frontend in development environments that are disconnected from the Mongo server.

## Generation of demo data

In [4]:
import sys
sys.path.append('../backend/lib')
sys.path.append('../backend')
from GraphSession import GraphSession
from ResultGraph import ResultGraph
from networkx import generate_gml
import networkx as nx
from networkx.readwrite import json_graph
import json

search_input = "tourigny"#"campbell cancer leukemia" #"nlp genomics"
graphsession = GraphSession(search_input)
citations = graphsession.get_citations_from_fulltext_mongo(search_input, retmax=200)
resultGraph = ResultGraph()
resultGraph.populate_from_cite_dict(citations)
        
resultGraph.extract_by_connectivity(connectivity=1)
resultGraph.extract_by_connectivity(connectivity=0)
        
# Query metadata
metadataList = graphsession.get_metadataList_from_mongo(resultGraph.nodeIds)
resultGraph.add_metadata_to_graph(metadataList)
json_graph.node_link_data(resultGraph.G)

07/19/2018 09:58:46 PM [INFO]: RequestFullTextSearch: Creating request
07/19/2018 09:58:46 PM [INFO]: ConnectEutils: Connecting to eutils API:
 https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&tool=graphsearch&sort=relevance&term=tourigny&retmax=200
07/19/2018 09:58:47 PM [INFO]: MongoSession: Mongo db connection successful
07/19/2018 09:58:47 PM [INFO]: MongoSession: Mongo db connection successful


{'directed': False,
 'graph': {},
 'links': [{'source': '20552060', 'target': '9856605'},
  {'source': '20552060', 'target': '18256123'},
  {'source': '20552060', 'target': '15679994'},
  {'source': '20552060', 'target': '17292982'},
  {'source': '20552060', 'target': '19028709'},
  {'source': '20552060', 'target': '19804548'},
  {'source': '20552060', 'target': '1245032'},
  {'source': '20552060', 'target': '23323117'},
  {'source': '20552060', 'target': '181165'},
  {'source': '20552060', 'target': '15583083'},
  {'source': '20552060', 'target': '19879594'},
  {'source': '20552060', 'target': '7484735'},
  {'source': '20552060', 'target': '18774110'},
  {'source': '23115575', 'target': '27144222'},
  {'source': '23115575', 'target': '26977318'},
  {'source': '18317561', 'target': '10197028'},
  {'source': '18317561', 'target': '16125809'},
  {'source': '18317561', 'target': '8669343'},
  {'source': '18317561', 'target': '15660297'},
  {'source': '18317561', 'target': '9347783'},
  {'

In [2]:
from networkx.readwrite import json_graph
import json

data = json_graph.node_link_data(resultGraph.G)
json.dump(data,open('output/demo_network_pubyear.json','w'),indent=2)

### Try reimporting

In [3]:
from networkx.readwrite import json_graph
import json

def read_json_file(filename):
    with open(filename) as f:
        js_graph = json.load(f)
    return json_graph.node_link_graph(js_graph)

g = read_json_file('output/demo_network.json')

In [4]:
g

<networkx.classes.graph.Graph at 0x150fe62550>

## Generate empty graph

In [1]:
import sys
sys.path.append('../backend/lib')
sys.path.append('../backend')
from GraphSession import GraphSession
from ResultGraph import ResultGraph
from networkx import generate_gml
import networkx as nx
from networkx.readwrite import json_graph
import json

search_input = "asdfasdfasdfasfd"#"campbell cancer leukemia" #"nlp genomics"
graphsession = GraphSession(search_input)
graphsession.return_empty_graph()

06/09/2018 03:51:47 PM [INFO]: RequestFullTextSearch: Creating request


{'graph': {'edges': [], 'nodes': []},
 'stats': {'max_degree_cited': 1.0,
  'num_citations': 0,
  'num_links': 0,
  'num_results': 0,
  'pub_years': {'num_bin': 0, 'values': []},
  'top_authors': '',
  'top_authors_list': [],
  'top_journals': '',
  'top_journals_list': []}}

## Simulate browser session

In [1]:
import sys
sys.path.append('../backend/lib')
sys.path.append('../backend')
from GraphSession import GraphSession
from ResultGraph import ResultGraph
from networkx import generate_gml
import networkx as nx
from networkx.readwrite import json_graph
import json

search_input = "campbell cancer leukemia" #"nlp genomics"
graphsession = GraphSession(search_input)
graphsession.get_cy_json(graph_format='cyto', mode='live')

08/03/2018 11:27:20 PM [INFO]: Created new log.
08/03/2018 11:27:20 PM [INFO]: RequestFullTextSearch: Creating request
08/03/2018 11:27:20 PM [INFO]: GraphSession: Search input received: campbell cancer leukemia
08/03/2018 11:27:20 PM [INFO]: Using live mode for data retrieval.
08/03/2018 11:27:20 PM [INFO]: ConnectEutils: Connecting to eutils API:
 https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pmc&tool=graphsearch&sort=relevance&term=campbell+cancer+leukemia&retmax=20
08/03/2018 11:27:32 PM [INFO]: Max degree: 2
08/03/2018 11:27:32 PM [INFO]: {"graph": {"nodes": [{"data": {"node_col": "#ff7373", "journal": "Cell", "key": "1652368", "authors": "Kakizuka, Miller, Umesono, Warrell", "cite_color": "black", "id": "1652368", "group": "Cited", "name": "1652368", "pubDate": "Aug 1991", "title": "Chromosomal translocation t(15;17) in human acute promyelocytic leukemia fuses RAR alpha with a novel putative transcription factor, PML.", "label": "Cell", "journal_iso": "Cell"}}, {"

08/03/2018 11:27:32 PM [INFO]: Max degree: 2


9
9


'{"graph": {"nodes": [{"data": {"node_col": "#ff7373", "journal": "Cell", "key": "1652368", "authors": "Kakizuka, Miller, Umesono, Warrell", "cite_color": "black", "id": "1652368", "group": "Cited", "name": "1652368", "pubDate": "Aug 1991", "title": "Chromosomal translocation t(15;17) in human acute promyelocytic leukemia fuses RAR alpha with a novel putative transcription factor, PML.", "label": "Cell", "journal_iso": "Cell"}}, {"data": {"node_col": "#ff7373", "journal": "Nature", "key": "22307276", "authors": "Degner, Pai, Pique-Regi, Veyrieras", "cite_color": "black", "id": "22307276", "group": "Cited", "name": "22307276", "pubDate": "Feb 2012", "title": "DNase\\u2009I sensitivity QTLs are a major determinant of human expression variation.", "label": "Nature", "journal_iso": "Nature"}}, {"data": {"node_col": "#ff7373", "journal": "Nucleic acids research", "key": "18178591", "authors": "Lin, Du, Huber, Kibbe", "cite_color": "black", "id": "18178591", "group": "Cited", "name": "181785