In [1]:
import json
import time
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [2]:
public_address = '54.174.175.98'
graph = Graph('bolt://{}:7687'.format(public_address), auth=('neo4j','myneo'))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

In [202]:
author_list = ['jcraigventer',
               'edwardsboyden',
               'davidbotstein',
               'georgemchurch',
               'shuguangzhang',
               'josephmjacobson']

titledf = pd.DataFrame()

for author in author_list:
    query = """
    match (a:Author)-[:AUTHORED]->(q:Quanta)
    where a.cleanName='{}'
    return a.cleanName as author, q.title as title    
    """.format(author)
    authordf = run_query(query, graph, to_df=True)
    titledf = pd.concat([authordf, titledf])

Query completed in 0.00 minutes.
Query completed in 0.00 minutes.
Query completed in 0.00 minutes.
Query completed in 0.00 minutes.
Query completed in 0.00 minutes.
Query completed in 0.00 minutes.


In [203]:
import markovify

models = {}
for author in author_list:
    print("Training model for {}...".format(author))
    authorcorpus = titledf.loc[titledf['author']==author,'title'] \
                    .str.cat(sep="\n")
    
    authormodel = markovify.NewlineText(authorcorpus, state_size=2)
    models[author] = authormodel

Training model for jcraigventer...
Training model for edwardsboyden...
Training model for davidbotstein...
Training model for georgemchurch...
Training model for shuguangzhang...
Training model for josephmjacobson...


In [204]:
for author in author_list:
    print("\n=== {} ===".format(author))
    for i in range(5):
        print("{}\n".format(models[author].make_sentence(tries=100)))


=== jcraigventer ===
Profound Perturbation of the mouse mitochondrial genome

A shotgun optical map of the Human Genome for Understanding Human Biology and Medicine

Ancient noncoding elements conserved in the metabolic rates of living cells

3,400 new expressed sequence tags identify diversity of the microbial kinome.

The Sequence of Plasmodium falciparum


=== edwardsboyden ===
Abstract 4229: Physical expansion of tissue microarrays for high-resolution imaging of RNA with expansion microscopy of zebrafish for neuroscience and developmental biology studies

Simultaneous whole-animal 3D imaging of RNA with expansion microscopy

Millisecond-timescale, genetically targeted optical control of excitable cells.

Channelrhodopsin-2 and optical control of gamma and theta activity by distinct interneuron networks in the ventral tegmental area induces reanimation from general anesthesia

Abstract 4229: Physical expansion of tissue microarrays for high-resolution imaging of neuronal activity u

In [174]:
query = """
match (q:Quanta)
return q.title as title    
"""
alltitledf = run_query(query, graph, to_df=True)

print("== Training model for all titles ==")
alltitlecorpus = alltitledf['title'].str.cat(sep="\n")
alltitlemodel = markovify.NewlineText(alltitlecorpus,
                                      retain_original=False,
                                      well_formed=True,
                                      state_size=2)

models['allauthors'] = alltitlemodel

print("== Done. ==\n")
for _ in range(10):
    print("{}\n".format(alltitlemodel.make_sentence(tries=100)))

Query completed in 1.30 minutes.
== Training model for all titles ==
== Done. ==

A New Inexpensive and Easily Prepared Post-Transition Metal Catalysts

Lehrbuch der Tropenkrankheiten

Medical Care: A Survey of the Glass Research at Last

Gut microbiome and enriches for a defective beta-D-galactosidase. II. Immunological relationship between the Basic Rocks

Birth of a Tetranuclear Manganese Aggregate Exhibiting Short OċO Interactions

Precision engineering for a leader.

Regiospecific anodic cyanation of aromatics from D-glucose: rate-limiting enzymes in oxidative stress injury

Methyl-coenzyme M Reductase

Managing the Detail Man-Reply

Light-directed, programmable microarray synthesis



In [219]:
author_weightings = {'jcraigventer':    0,
                     'edwardsboyden':   0,
                     'davidbotstein':   0,
                     'georgemchurch':   5,
                     'shuguangzhang':   0,
                     'josephmjacobson': 100}
#                      'allauthors': 0}

combinedmodel = markovify.combine(
                    [models[a] for a in author_weightings.keys()],
                    list(author_weightings.values()))

In [220]:
for _ in range(200):
    print("{}\n".format(combinedmodel.make_sentence(tries=100)))

A computational analysis of LexA binding reveals the permissive nature of the Antibiotic Resistance Reservoir in the laboratory.

High-throughput creation and functional profiling of DNA hybridization through inductive coupling to an attached metal nanocrystal antenna

Opinion: Advocating for science progress as a Cause of Dilated Cardiomyopathy

A computational analysis of the Escherichia coli

Engineering an allosteric transcription factor binding sites

CRISPR–Cas encoding of a Novel Set of Genes Regulated by a unique machine.

A whole genome approach to in vivo methylase protection in E. coli

Probing the limits of genetic network architecture

BIOSAFETY. Safeguarding gene drive experiments in the PGT121 Family of Broadly Neutralizing HIV Antibodies

The effects of somatic hypermutation on neutralization and binding in the PGT121 Family of Broadly Neutralizing HIV Antibodies

Gem of an immunoglobulin enhancer with cellular factors in vivo DNA-protein interactions in E. coli

Charac