# Nobelplot

This notebook creates the interactive versions of Fig. 2a and Fig. 2b of the paper "Interdisciplinarity: A Nobel Opportunity" by Michael Szell, Yifang Ma, and Roberta Sinatra. Data is from Web of Science.

Created on:  2018-07-21  
Last update: 2018-07-21  
Contact: michael.szell@gmail.com (Michael Szell)

## Preliminaries

In [1]:
from bokeh.layouts import row
from bokeh.plotting import figure, show, output_file, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.models.glyphs import Text
from collections import OrderedDict
import csv
import math

In [2]:
plotwidth = 600
boundary = 50

def triple2ternary(triple, pw=800):
    # normalize 
    triple = [float(i)/sum(triple) for i in triple]
    y = triple[2] * math.sin(math.radians(60));
    x = 1 - triple[0] - y * 1/math.tan(math.radians(60));
    y = 1-y
    return pw*x,pw*y

def citation2size(cit):
    return int(plotwidth/440 * (round(math.sqrt(600.*float(cit)/8320))+4))

## Interdisciplinary papers

In [3]:
# https://stackoverflow.com/questions/19486369/extract-csv-file-specific-columns-to-list-in-python/19487003
with open('../data/top220interdisciplinary.csv', 'r', encoding='utf-8-sig', errors='ignore') as infile:
  # read the file as a dictionary for each row ({header : value})
  reader = csv.DictReader(infile)
  datatop220 = {}
  top220ranks = []
  for row in reader:
    for header, value in row.items():
        if header == "Citation rank within Top 10000": 
            top220ranks.append(value)
        try:
            datatop220[header].append(value.replace(u'\xa0', u' '))
        except KeyError:
            datatop220[header] = [value.replace(u'\xa0', u' ')]

with open('../data/top10000.csv', 'r', encoding='utf-8-sig') as infile:
  # read the file as a dictionary for each row ({header : value})
  reader = csv.DictReader(infile)
  datatop10000 = {}
  for row in reader:
    for header, value in row.items():
        try:
            datatop10000[header].append(value.replace(u'\xa0', u' '))
        except KeyError:
            datatop10000[header] = [value.replace(u'\xa0', u' ')]

headers = ["Life Sciences", "Physics", "Chemistry", "Sums of cits"]
for i in top220ranks:
    for h in headers:
        try:
            datatop220[h].append(int(round(float(datatop10000[h][int(i)]))))
        except KeyError:
            datatop220[h] = [int(round(float(datatop10000[h][int(i)])))]
        
datatop220['x'] = [triple2ternary([int(i),int(j),int(k)], plotwidth)[0] for i,j,k in zip(datatop220['Life Sciences'], datatop220['Physics'], datatop220['Chemistry'])]
datatop220['y'] = [triple2ternary([int(i),int(j),int(k)], plotwidth)[1] for i,j,k in zip(datatop220['Life Sciences'], datatop220['Physics'], datatop220['Chemistry'])]
datatop220['color'] = ['#3c2152' if i == "Artificial intelligence" else '#9b226d' if i == "Network science" else '#eb96a9' if i == "Geology" else '#fbddd5' if i == "Signal processing" else '#fffdf9' if i == "Quantum dots" else '#808180' for i in datatop220['Subfield']]
datatop220['alpha'] = [0.92 if i else 0.25 for i in datatop220['Subfield']]
datatop220['line_color'] = ["#1a1919" if i else "#b4b5b4" for i in datatop220['Subfield']]
datatop220['size'] = [citation2size(i) for i in datatop220['Sums of cits']]
datatop220['titletruncated'] = [(i[:65] + '..').capitalize() if len(i) > 65 else i.capitalize() for i in datatop220['Title']]
datatop220['c_other'] = [int(c)-int(c1)-int(c2)-int(c3) for c1,c2,c3,c in zip(datatop220['Life Sciences'], datatop220['Physics'], datatop220['Chemistry'], datatop220['Sums of cits'])]
datatop220['c_rank'] = [int(i)+1 for i in datatop220['Citation rank within Top 10000']]
datatop220['Journal'] = [i.capitalize() for i in datatop220['Journal']]
datatop220['Authors'] = [(i[:65] + '..').replace(";", ",") if len(i) > 65 else i.replace(";", ",") for i in datatop220['Authors']]

sourcetop220 = ColumnDataSource(data=dict(
    x = datatop220['x'],
    y = datatop220['y'],
    color = datatop220['color'],
    line_color = datatop220['line_color'],
    alpha = datatop220['alpha'],
    size = datatop220['size'],
    subfield = datatop220['Subfield'],
    publicationyear = datatop220['Publication year'],
    c_lifesciences = datatop220['Life Sciences'],
    c_physics = datatop220['Physics'],
    c_chemistry = datatop220['Chemistry'],
    c_total = datatop220['Sums of cits'],
    c_other = datatop220['c_other'],
    c_rank = datatop220['c_rank'],
    authors = datatop220['Authors'],
    title = datatop220['titletruncated'],
    journal = datatop220['Journal']
))

In [4]:
# https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html
hover = HoverTool(names=["papers"], tooltips="""
    <div style="text-shadow: #000 0px 0px 0.5px, #000 0px 0px 0.5px; font-size: 14px;margin-bottom: 10px;font-weight: bold;color: @color">
        @subfield
    </div>
    <div style="margin-bottom: 10px;font-size: 12px">
        <span style="">@authors</br>@title</br>@journal, @publicationyear</span>
    </div>
    <div style="font-size: 12px;font-weight: bold">
        Citations: @c_total = <span style="color: #205483">@c_lifesciences </span> + <span style="color: #b26d24">@c_physics </span> + <span style="color: #96932c">@c_chemistry </span> + <span style="color: #999999">@c_other</span></br>
    </div>
    """
)

In [5]:
top220plot = figure(title="The impact space of top-impact interdisciplinary papers", tools=[hover,"wheel_zoom","pan","box_zoom","reset"], toolbar_location="right",
            x_range=[0-boundary, plotwidth+boundary], y_range=[0-0.2*boundary, plotwidth+1.8*boundary], active_drag="pan", active_scroll="wheel_zoom",plot_width=plotwidth, plot_height=plotwidth)

top220plot.patch([plotwidth*0.1,plotwidth*0.9,plotwidth*0.5,plotwidth*0.1], [plotwidth,plotwidth,plotwidth-0.8*plotwidth*math.sqrt(3)/2,plotwidth], alpha=1, line_width=0, color="#ede4ea")
top220plot.line(x=[0,plotwidth,0.5*plotwidth,0], y=[plotwidth,plotwidth,plotwidth-plotwidth*math.sqrt(3)/2,plotwidth], line_color="#1a1919", line_width=2, line_alpha=1)
top220plot.circle('x', 'y', size="size", fill_color="color", name="papers", fill_alpha="alpha", line_alpha="alpha", line_color="line_color", line_width=1.5, source=sourcetop220)

source2 = ColumnDataSource(dict(x=[boundary/2+70,plotwidth-boundary/2-45,plotwidth/2], y=[plotwidth+boundary/3,plotwidth+boundary/3,plotwidth-1.09*plotwidth*math.sqrt(3)/2], text=["Life Sciences","Physics","Chemistry"], cols=["#205483","#b26d24","#96932c"], text_align=["left","right","center"]))
glyph = Text(x="x", y="y", text="text", text_color="cols", text_align="center", text_font_size="18pt" )
top220plot.add_glyph(source2, glyph)

top220plot.axis.visible = False
top220plot.xgrid.visible = False
top220plot.ygrid.visible = False
#nobelplot.outline_line_color = None

output_file("../nobelplot.html", title="Nobel plot")
show(top220plot)  # open a browser

## Nobel Prize papers

In [6]:
# https://stackoverflow.com/questions/19486369/extract-csv-file-specific-columns-to-list-in-python/19487003
with open('../data/nobelpapers.csv', 'r', encoding='utf-8-sig') as infile:
  # read the file as a dictionary for each row ({header : value})
  reader = csv.DictReader(infile)
  datain = {}
  for row in reader:
    for header, value in row.items():
      try:
        datain[header].append(value.replace(u'\xa0', u' '))
      except KeyError:
        datain[header] = [value.replace(u'\xa0', u' ')]

datain['x'] = [triple2ternary([int(i),int(j),int(k)], plotwidth)[0] for i,j,k in zip(datain['Life Sciences'], datain['Physics'], datain['Chemistry'])]
datain['y'] = [triple2ternary([int(i),int(j),int(k)], plotwidth)[1] for i,j,k in zip(datain['Life Sciences'], datain['Physics'], datain['Chemistry'])]
datain['color'] = ['#286aa6' if i == "Physiology/Medicine" else '#f6ea3e' if i == "Chemistry" else '#ea8c2e' for i in datain['Discipline']]
datain['colordark'] = ['#205483' if i == "Physiology/Medicine" else '#96932c' if i == "Chemistry" else '#b26d24' for i in datain['Discipline']]
datain['size'] = [citation2size(i) for i in datain['Total citations after 10 years']]
datain['laureates'] = [(l1+", "+l2+", "+l3).strip(", ") for l1,l2,l3 in zip(datain['Laureate1'], datain['Laureate2'], datain['Laureate3'])]
datain['titletruncated'] = [(i[:65] + '..') if len(i) > 65 else i for i in datain['Title']]
datain['c_other'] = [int(c)-int(c1)-int(c2)-int(c3) for c1,c2,c3,c in zip(datain['Life Sciences'], datain['Physics'], datain['Chemistry'], datain['Total citations after 10 years'])]

source = ColumnDataSource(data=dict(
    x = datain['x'],
    y = datain['y'],
    color = datain['color'],
    colordark = datain['colordark'],
    size = datain['size'],
    discipline = datain['Discipline'],
    publicationyear = datain['Publication Year'],
    c_lifesciences = datain['Life Sciences'],
    c_physics = datain['Physics'],
    c_chemistry = datain['Chemistry'],
    c_total = datain['Total citations after 10 years'],
    c_other = datain['c_other'],
    authors = datain['Authors'],
    title = datain['titletruncated'],
    journal = datain['Journal'],
    vol = datain['Vol/Issue/Page'],
    nobelyear = datain['Nobel Year'],
    laureates = datain['laureates']
))

In [7]:
# https://bokeh.pydata.org/en/latest/docs/user_guide/tools.html
hover = HoverTool(names=["papers"], tooltips="""
    <div style="font-size: 14px;margin-bottom: 10px;font-weight: bold">
        Nobel Prize @nobelyear in <span style="color: @colordark">@discipline</span> to @laureates
    </div>
    <div style="margin-bottom: 10px;font-size: 12px">
        <span style="">@authors</br>@title</br>@journal @vol, @publicationyear</span>
    </div>
    <div style="font-size: 12px;font-weight: bold">
        Citations: @c_total = <span style="color: #205483">@c_lifesciences </span> + <span style="color: #b26d24">@c_physics </span> + <span style="color: #96932c">@c_chemistry </span> + <span style="color: #999999">@c_other</span>
    </div>
    """
)

In [8]:
nobelplot = figure(title="The impact space of Nobel Prize winning papers", tools=[hover,"wheel_zoom","pan","box_zoom","reset"], toolbar_location="right",
            x_range=[0-boundary, plotwidth+boundary], y_range=[0-0.2*boundary, plotwidth+1.8*boundary], active_drag="pan", active_scroll="wheel_zoom",plot_width=plotwidth, plot_height=plotwidth)

nobelplot.patch([plotwidth*0.1,plotwidth*0.9,plotwidth*0.5,plotwidth*0.1], [plotwidth,plotwidth,plotwidth-0.8*plotwidth*math.sqrt(3)/2,plotwidth], alpha=1, line_width=0, color="#ede4ea")
nobelplot.line(x=[0,plotwidth,0.5*plotwidth,0], y=[plotwidth,plotwidth,plotwidth-plotwidth*math.sqrt(3)/2,plotwidth], line_color="#1a1919", line_width=2, line_alpha=1)
nobelplot.circle('x', 'y', size="size", fill_color="color", name="papers", fill_alpha= 0.92, line_color="#1a1919", line_width=1.5, source=source)

source2 = ColumnDataSource(dict(x=[boundary/2+70,plotwidth-boundary/2-45,plotwidth/2], y=[plotwidth+boundary/3,plotwidth+boundary/3,plotwidth-1.09*plotwidth*math.sqrt(3)/2], text=["Life Sciences","Physics","Chemistry"], cols=["#205483","#b26d24","#96932c"], text_align=["left","right","center"]))
glyph = Text(x="x", y="y", text="text", text_color="cols", text_align="center", text_font_size="18pt" )
nobelplot.add_glyph(source2, glyph)

nobelplot.axis.visible = False
nobelplot.xgrid.visible = False
nobelplot.ygrid.visible = False
#nobelplot.outline_line_color = None

output_file("../nobelplot.html", title="Nobel plot")
show(nobelplot)  # open a browser