# Careerplot

This script takes a data set of a scientific career extracted from Web of Science (WoS) and creates a career lollipop plot, as in: http://science.sciencemag.org/content/354/6312/aaf5239

The plot uses bokeh and is interactive.

In [34]:
from bokeh.layouts import row
from bokeh.plotting import figure, show, output_file, ColumnDataSource
from bokeh.models import HoverTool
from collections import OrderedDict
import csv
from datetime import datetime as dt
import time

In [92]:
plotwidth = 820
plotheight = 380

In [35]:
#https://stackoverflow.com/questions/6451655/python-how-to-convert-datetime-dates-to-decimal-years
def toYearFraction(date):
    def sinceEpoch(date): # returns seconds since epoch
        return time.mktime(date.timetuple())
    s = sinceEpoch

    year = date.year
    startOfThisYear = dt(year=year, month=1, day=1)
    startOfNextYear = dt(year=year+1, month=1, day=1)

    yearElapsed = s(date) - s(startOfThisYear)
    yearDuration = s(startOfNextYear) - s(startOfThisYear)
    fraction = yearElapsed/yearDuration

    return date.year + fraction

In [63]:
## Load data

# https://stackoverflow.com/questions/19486369/extract-csv-file-specific-columns-to-list-in-python/19487003
with open('../data/career_hawking_scipub.csv', 'r') as infile:
  # read the file as a dictionary for each row ({header : value})
  reader = csv.DictReader(infile)
  data = {}
  for row in reader:
    for header, value in row.items():
      try:
        data[header].append(value)
      except KeyError:
        data[header] = [value]

data['date_yfraction'] = [toYearFraction(dt.strptime(data['date'][x], '%Y-%m-%d')) for x in range(len(data['date']))]
data['date_year'] = [int(x) for x in data['date_yfraction']]
data['title'] = [x.strip().title() for x in data['title']]
data['journal'] = [x.strip().title() for x in data['journal']]

source_scipub = ColumnDataSource(data=dict(
    journal = data['journal'],
    date = data['date'],
    date_yfraction = data['date_yfraction'],
    date_year = data['date_year'],
    c10 = data['c10'],
    ctot = data['ctot'],
    title = data['title']
))




with open('../data/career_hawking_book.csv', 'r') as infile:
  # read the file as a dictionary for each row ({header : value})
  reader = csv.DictReader(infile)
  data_book = {}
  for row in reader:
    for header, value in row.items():
      try:
        data_book[header].append(value)
      except KeyError:
        data_book[header] = [value]

data_book['ratecountdiv100'] = [float(x)/100 for x in data_book['ratecount']]
data_book['date_year'] = [int(x) for x in data_book['date_year']]
        
source_book = ColumnDataSource(data=dict(
    date_year = data_book['date_year'],
    avgrate = data_book['avgrate'],
    ratecount = data_book['ratecount'],
    ratecountdiv100 = data_book['ratecountdiv100'],
    numberreviews = data_book['numberreviews'],
    title = data_book['title'],
    url = data_book['url']
))

In [94]:
mindate = int(min(data['date_year']+data_book['date_year']) - 2)
maxdate = int(max(data['date_year']+data_book['date_year']) + 2)

careerplot = figure(title="Career of Stephen Hawking", tools=["wheel_zoom","pan","box_zoom","reset"], toolbar_location="above",
            x_range=[mindate, maxdate], y_range=[-20,700], active_drag="pan", active_scroll="wheel_zoom",plot_width=plotwidth, plot_height=plotheight)


# Scientific publications
careerplot.segment('date_yfraction', 0, 'date_yfraction', 'c10', line_width=1.5, line_color="#CC9999", line_alpha = 0.8, source=source_scipub)
renderer_scipub = careerplot.circle('date_yfraction', 'c10' , size=13, fill_color="#CC0033", fill_alpha= 0.8, line_color="#CC9999", line_width=1.5, source=source_scipub)

# https://stackoverflow.com/questions/27545842/hovertool-for-multiple-data-series-in-bokeh-scatter-plot
# https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html
careerplot.add_tools(HoverTool(renderers=[renderer_scipub], tooltips="""
    <div style="padding-bottom:8px;">
        <span style="font-size: 20px;">@title</br></span>
        <span style="font-size: 14px;">@journal (@date_year)</span>
    </div>
    <div>
        <span style="font-size: 12px;">Citations after 10 years: <b>@c10</b>, Total citations: <b>@ctot</b></span>
    </div>
    """
))



# Books
careerplot.segment('date_year', 0, 'date_year', 'ratecountdiv100', line_width=2.5, line_color="#6666AA", line_alpha = 0.8, source=source_book)
renderer_book = careerplot.circle('date_year', 'ratecountdiv100' , size=16, fill_color="#3333CC", fill_alpha= 0.8, line_color="#6666AA", line_width=2.5, source=source_book)

careerplot.add_tools(HoverTool(renderers=[renderer_book], tooltips="""
    <div style="padding-bottom:8px;">
        <span style="font-size: 20px;">@title (@date_year)</br></span>
    </div>
    <div>
        <span style="font-size: 12px;"><img src="assets/logo_goodreads.png" width="16" height="16" style="vertical-align: middle;"> Rating: <b>@avgrate</b> &nbsp;&middot;&nbsp; <b>@ratecount</b> Ratings &nbsp;&middot;&nbsp; <b>@numberreviews</b> Reviews </span>
    </div>
    """
))


# Hawking portraits
ratio = (plotwidth/plotheight)*720/(maxdate-mindate)
careerplot.image_url(url=['assets/cropped1.png'], x=1965, y=500, w=160/ratio,h=160)
careerplot.image_url(url=['assets/cropped2.png'], x=1992, y=500, w=160/ratio,h=160)
careerplot.image_url(url=['assets/cropped3.png'], x=2012, y=500, w=160/ratio,h=160)

careerplot.yaxis.axis_label = "Impact"
careerplot.yaxis.axis_label_text_font_style = "normal"

output_file("../careerplot.html", title="Career plot")

show(careerplot)  # open a browser
export_png(careerplot, filename="plot.png")

NameError: name 'export_png' is not defined