In [1]:
from Bio import Entrez
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure, show
import numpy as np
import os
import pandas as pd
import pickle
import time
import sys

In [2]:
output_notebook()

In [3]:
df = pd.read_csv('rsa_pubmed.csv', index_col=0)

In [4]:
# compute the year
df['year'] = df.apply(lambda row: int(row.PubDate[:4]), axis=1)

In [5]:
# count publications per year
year_counts = df.groupby('year').Id.nunique()
data = {'x': year_counts.index, 'y': year_counts.values}
year_src = ColumnDataSource(data)

In [6]:
# setup hovertool
hover = HoverTool(tooltips=[
    ("Year", "@x"),
    ("Count", "@y"),
])

fig_args = {'width': 900, 'height': 200}
fig = figure(**fig_args, title='Articles Per Year', tools=[hover])
fig.line('x', 'y', source=year_src)
fig.circle('x', 'y', source=year_src)
show(fig)

In [7]:
journal_counts = df.groupby('FullJournalName').Id.nunique()

In [8]:
top20 = journal_counts[np.argsort(journal_counts.values)[-20:][::-1]]

In [9]:
data = {
    'x': np.arange(20),
    'y': top20.values,
    'name': top20.index.tolist()
}
jrnl_src = ColumnDataSource(data)

# setup hovertool
hover = HoverTool(tooltips=[
    ("Journal", "@name"),
    ("Count", "@y"),
])

fig = figure(**fig_args, title='Articles Per Journal (Top 20)', tools=[hover])
fig.circle('x', 'y', source=jrnl_src)
show(fig)