In [1]:
from Bio import Entrez
from bokeh.io import output_notebook
from bokeh.layouts import row
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.plotting import figure, show
from bokeh.resources import INLINE
import numpy as np
import os
import pandas as pd
import pickle
import time
import sys

In [2]:
output_notebook(resources=INLINE)

In [3]:
df = pd.read_csv('rsa_pubmed_citations.csv', index_col=0)
def fun(v):
    id_list = v.replace('[', '').replace(']', '').replace("'", '').split(',')
    rval = []
    for i in id_list:
        if len(i) > 0:
            rval.append(int(i))
    return len(rval)

df['n_cites'] = df['cites'].apply(fun)
df['n_citedby'] = df['citedby'].apply(fun)

In [4]:
df['n_cites'].min(), df['n_cites'].max(), df['n_citedby'].min(), df['n_citedby'].max()

(0, 233, 0, 339)

In [5]:
fig_args = dict(width=400, height=400)
figs = []

for plot in ('n_cites', 'n_citedby'):
    
    bins = 10**np.linspace(-1, 3, 20)
    vals, bins = np.histogram(df[plot], bins)
    
    # get indices to articles within this bin
    
    data = {
        'left': bins[:-1],
        'right': bins[1:],
        'top': vals,
        'bottom': np.zeros(len(vals))
    }
    src = ColumnDataSource(data)
    
    hover = HoverTool()
    hover.point_policy = "follow_mouse"
    hover.tooltips = [
        ("Count", "@top"),
        ("Min", "@left"),
        ("Max", "@right"),
    ]
    
    fig = figure(**fig_args, tools=[hover], title=plot, x_axis_type="log")
    fig.quad(
        left='left',
        right='right',
        top='top',
        bottom='bottom',
        source=src,
        line_color='black',
        fill_color = 'maroon',
        line_width = 2
    )
    
    figs.append(fig)

lout = row(*figs)
show(lout)

In [9]:
fig = figure(width=400, height=200)
fig.circle('n_cites', 'n_citedby', source=ColumnDataSource(df))
show(fig)