In [1]:
import json
import time
from functools import reduce

import pandas as pd
import numpy as np

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from tqdm.autonotebook import tqdm

from py2neo import Graph, Node, Relationship

In [2]:
public_address = '54.174.175.98'
graph = Graph('bolt://{}:7687'.format(public_address), auth=('neo4j','myneo'))

def run_query(query, graph, print_query=False, run_query=True, 
              print_only=False, to_df=False, verbose=True):
    df = 1
    if print_only: 
        print_query = True
        run_query = False
    start_time = time.time()
    if print_query:
        print(query)
    if run_query:
        if to_df:
            df = graph.run(query).to_data_frame()
        else:
            graph.run(query)
    end_time = time.time()
    minutes_elapsed = (end_time-start_time)/60
    if verbose:
        print("Query completed in {:.2f} minutes.".format(minutes_elapsed))
    return df

In [155]:
author_list = ['jcraigventer',
               'edwardsboyden',
               'davidbotstein',
               'georgemchurch']

titledf = pd.DataFrame()

for author in author_list:
    query = """
    match (a:Author)-[:AUTHORED]->(q:Quanta)
    where a.cleanName='{}'
    return a.cleanName as author, q.title as title    
    """.format(author)
    authordf = run_query(query, graph, to_df=True)
    titledf = pd.concat([authordf, titledf])

Query completed in 0.00 minutes.
Query completed in 0.00 minutes.
Query completed in 0.00 minutes.
Query completed in 0.00 minutes.


In [156]:
import markovify

models = {}
for author in author_list:
    print("Training model for {}...".format(author))
    authorcorpus = titledf.loc[titledf['author']==author,'title'] \
                    .str.cat(sep="\n")
    
    authormodel = markovify.NewlineText(authorcorpus, state_size=2)
    models[author] = authormodel

Training model for jcraigventer...
Training model for edwardsboyden...
Training model for davidbotstein...
Training model for georgemchurch...


In [157]:
for author in author_list:
    print("\n=== {} ===".format(author))
    for i in range(5):
        print("{}".format(models[author].make_sentence(tries=100)))


=== jcraigventer ===
Genomic Islands in the human genome
Comparative genomics of the Sargasso Sea
Fast and accurate HLA typing from short-read next-generation sequence data release be forced on the Drosophila phylogeny
Structural and functional diversity of transcripts in human microbiome research
Precision medicine screening using whole-genome sequencing and assembly of the microbial kinome.

=== edwardsboyden ===
Independent control of gamma and theta activity by distinct interneuron networks in the ventral tegmental area induces reanimation from general anesthesia
Millisecond-timescale, genetically targeted optical control of gamma and theta activity by distinct interneuron networks in the Nonhuman Primate Brain
Independent control of excitable cells.
Channelrhodopsin-2 and optical control of neural activity
Channelrhodopsin-2 and optical control of gamma and theta activity by distinct interneuron networks in the ventral tegmental area induces reanimation from general anesthesia

=

In [111]:
query = """
match (q:Quanta)
return q.title as title    
"""
alltitledf = run_query(query, graph, to_df=True)

print("== Training model for all titles ==")
alltitlecorpus = alltitledf['title'].str.cat(sep="\n")
alltitlemodel = markovify.NewlineText(alltitlecorpus,
                                      retain_original=False,
                                      well_formed=True,
                                      state_size=2)

models['allauthors'] = alltitlemodel

print("== Done. ==\n")
for _ in range(10):
    print("\n{}".format(alltitlemodel.make_sentence(tries=100)))

Query completed in 1.14 minutes.
== Training model for all titles... 
Done. ==

Ethnic journal
Book Review: The Organic Chemistry
Dr. Victor Grafe, Professor an der Akademie der Wissenschaften. Begonnen von R. Pophal: „Zur Ehrenrettung der Reflexnatur der Sehnenphänomene“
A Non-colonizing Aphid Vector of Potato Slopes for Reactions with Ketimines
Abstract 3824: Antitumor activity of lymphoid cells in the Oil-Water Interface Templating of Superconducting Qubits
Japan disappoints seekers of ?foresight?
Not “Socialized Medicine” — An Epidemic of louse-borne relapsing fever in Indonesia during 1997
SNPs Meet CNVs in 16,000 cases of refractory anemia in the Gall Bladder as a Determinant of the Massachusetts General Hospital. Case 33-2005. A 43-year-old man with chest pain.
Pathological missense mutations
A New Separation Methods with On‐Chip X‐Ray Diffraction Studies of Human Fab Fragment Selected from a Long Noncoding RNAs Promote Transcriptional Heterogeneity and Clonal Hierarchy of Epith

In [159]:
author_weightings = {'jcraigventer':    0,
                     'edwardsboyden':   0,
                     'davidbotstein':   50,
                     'georgemchurch':   50}
#                      'allauthors': 0}

combinedmodel = markovify.combine(
                    [models[a] for a in author_weightings.keys()],
                    list(author_weightings.values()))

In [160]:
for _ in range(10):
    print("{}\n".format(combinedmodel.make_sentence(tries=100)))

Identification of clinically distinct types of di use large b-cell lymphoma identi ed by gene expression patterns in scleroderma skin

Probing the limits of genetic network architecture

B lineage--specific interactions of an Evolved Bacterial Genome

Genome-wide analysis of DNA pools from high-fidelity microchips.

Databases for gene expression programs in human cells

Genomic analysis of gene expression in adenocarcinoma of the yeast cell-cycle transcription factors SBFand MBF

A whole genome approach to visualizing sequence motifs.

Large-scale de novo DNA synthesis: technologies and applications of in vitro mutagenesis

Multiplex amplification of a digital movie into the genomes of a marine virus and host reveal features of co-evolution.

A National Network of Neurotechnology Centers for the separation of chromosomes on the secretion machinery

