In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.options.display.max_columns = 999
sns.set_style('darkgrid')
df = pd.read_csv('traffic.zip', parse_dates={'timestamp':[0,1]}) #look up time zones (UTC)
df.columns = map(str.lower, df.columns)
for col in ['borough', 'on street name', 'cross street name', 'off street name']:
    df[col] = df[col].str.lower()
df.columns = df.columns.str.replace(' ','_')
df.zip_code = df.zip_code.dropna().apply(lambda x: str(int(x)))
df.collision_id = df.collision_id.dropna().apply(lambda x: str(int(x)))
df = df.dropna(subset=['borough', 'zip_code', 'latitude'])
df = df.reset_index(drop=True)
df = df[['timestamp', 'borough', 'zip_code', 'latitude', 'longitude', 'on_street_name', 'number_of_persons_injured', 'number_of_persons_killed', 'contributing_factor_vehicle_1']]


In [2]:
df.head()

Unnamed: 0,timestamp,borough,zip_code,latitude,longitude,on_street_name,number_of_persons_injured,number_of_persons_killed,contributing_factor_vehicle_1
0,2020-08-29 15:40:00,bronx,10466,40.8921,-73.83376,pratt avenue,0,0,Passing Too Closely
1,2020-08-29 21:00:00,brooklyn,11221,40.6905,-73.919914,bushwick avenue,2,0,Reaction to Uninvolved Vehicle
2,2020-08-29 00:00:00,bronx,10459,40.82472,-73.89296,,0,0,Unsafe Speed
3,2020-08-29 17:10:00,brooklyn,11203,40.64989,-73.93389,,0,0,Failure to Yield Right-of-Way
4,2020-08-29 19:30:00,bronx,10459,40.825226,-73.88778,longfellow avenue,0,0,Unspecified


In [5]:
import pandas as pd

from bokeh.models import ColumnDataSource, LabelSet
from bokeh.plotting import figure, show
from bokeh.sampledata.periodic_table import elements
from bokeh.io import output_notebook

elements = elements.copy()
elements = elements[elements["atomic number"] <= 82]
elements = elements[~pd.isnull(elements["melting point"])]
mass = [float(x.strip("[]")) for x in elements["atomic mass"]]
elements["atomic mass"] = mass

palette = ["#053061", "#2166ac", "#4393c3", "#92c5de", "#d1e5f0",
           "#f7f7f7", "#fddbc7", "#f4a582", "#d6604d", "#b2182b", "#67001f"]

melting_points = elements["melting point"]
low = min(melting_points)
high = max(melting_points)
melting_point_inds = [int(10*(x-low)/(high-low)) for x in melting_points] #gives items in colors a value from 0-10
elements['melting_colors'] = [palette[i] for i in melting_point_inds]

TITLE = "Density vs Atomic Weight of Elements (colored by melting point)"
TOOLS = "hover,pan,wheel_zoom,box_zoom,reset,save"

p = figure(tools=TOOLS, toolbar_location="above", width=1200, title=TITLE)
p.toolbar.logo = "grey"
p.background_fill_color = "#efefef"
p.xaxis.axis_label = "atomic weight (amu)"
p.yaxis.axis_label = "density (g/cm^3)"
p.grid.grid_line_color = "white"
p.hover.tooltips = [
    ("name", "@name"),
    ("symbol:", "@symbol"),
    ("density", "@density"),
    ("atomic weight", "@{atomic mass}"),
    ("melting point", "@{melting point}")
]

source = ColumnDataSource(elements)

p.scatter("atomic mass", "density", size=12, source=source,
          color='melting_colors', line_color="black", alpha=0.9)

labels = LabelSet(x="atomic mass", y="density", text="symbol", y_offset=8,
                  text_font_size="11px", text_color="#555555",
                  source=source, text_align='center')
p.add_layout(labels)

output_notebook()

show(p)