# Careerplot

This script takes a data set of a scientific career extracted from Web of Science (WoS) and creates a career lollipop plot, as in: http://science.sciencemag.org/content/354/6312/aaf5239

The plot uses bokeh and is interactive.

In [1]:
from bokeh.layouts import row
from bokeh.plotting import figure, show, output_file, ColumnDataSource
from bokeh.models import HoverTool
from collections import OrderedDict
import csv
from datetime import datetime as dt
import time

In [5]:
#https://stackoverflow.com/questions/6451655/python-how-to-convert-datetime-dates-to-decimal-years
def toYearFraction(date):
    def sinceEpoch(date): # returns seconds since epoch
        return time.mktime(date.timetuple())
    s = sinceEpoch

    year = date.year
    startOfThisYear = dt(year=year, month=1, day=1)
    startOfNextYear = dt(year=year+1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed/yearDuration

    return date.year + fraction

In [6]:
## Load data

# https://stackoverflow.com/questions/19486369/extract-csv-file-specific-columns-to-list-in-python/19487003
with open('../data/career_hawking_scipub.csv', 'r') as infile:
  # read the file as a dictionary for each row ({header : value})
  reader = csv.DictReader(infile)
  data = {}
  for row in reader:
    for header, value in row.items():
      try:
        data[header].append(value)
      except KeyError:
        data[header] = [value]

data['date_yfraction'] = [toYearFraction(dt.strptime(data['date'][x], '%Y-%m-%d')) for x in range(len(data['date']))]
data['date_year'] = [int(x) for x in data['date_yfraction']]
data['title'] = [x.strip().title() for x in data['title']]

source = ColumnDataSource(data=dict(
    pubtype = data['pubtype'],
    date = data['date'],
    date_yfraction = data['date_yfraction'],
    date_year = data['date_year'],
    c10 = data['c10'],
    ctot = data['ctot'],
    title = data['title']
))

In [7]:
# https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html
hover = HoverTool( tooltips="""
    <div>
        <span style="font-size: 16px;">@title</span>
    </div>
    <div>
        <span style="font-size: 12px;">Citations after 10 years: @c10, Total citations: @ctot </br>Publication year: @date_year</span>
    </div>
    """
)

In [8]:
mindate = int(min(data['date_yfraction']) - 1)
maxdate = int(max(data['date_yfraction']) + 1)

careerplot = figure(title="Career of Stephen Hawking", tools=[hover,"wheel_zoom","pan","box_zoom","reset"], toolbar_location="right",
            x_range=[mindate, maxdate], y_range=[0,700], active_drag="pan", active_scroll="wheel_zoom",plot_width=1200, plot_height=500)

careerplot.segment('date_yfraction', 0, 'date_yfraction', 'c10', line_width=1.5, line_color="#AAAAAA", line_alpha = 0.8, source=source)
careerplot.circle('date_yfraction', 'c10' , size=13, fill_color="#CC0033", fill_alpha= 0.8, line_color="#AAAAAA", line_width=1.5, source=source)
careerplot.yaxis.axis_label = "Impact"
careerplot.yaxis.axis_label_text_font_style = "normal"

output_file("careerplot.html", title="Career plot")

show(careerplot)  # open a browser