In [1]:
import pandas as pd
from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import BoxZoomTool,ResetTool, HoverTool, WheelZoomTool, PanTool
from bokeh.io import hplot, output_notebook, vplot

Link to the github repo for the dataset

https://github.com/fivethirtyeight/data/tree/master/bad-drivers

In [2]:
df = pd.read_csv("dataset.csv")

In [3]:
#Column names
df.columns

Index([u'State',
       u'Number of drivers involved in fatal collisions per billion miles',
       u'Percentage Of Drivers Involved In Fatal Collisions Who Were Speeding',
       u'Percentage Of Drivers Involved In Fatal Collisions Who Were Alcohol-Impaired',
       u'Percentage Of Drivers Involved In Fatal Collisions Who Were Not Distracted',
       u'Percentage Of Drivers Involved In Fatal Collisions Who Had Not Been Involved In Any Previous Accidents',
       u'Car Insurance Premiums ($)',
       u'Losses incurred by insurance companies for collisions per insured driver ($)'],
      dtype='object')

In [4]:
col = ['State', 'DriversperBillionMiles', 'Perc_Speeding', 'Perc_AlcoholImpaired', 'Perc_NotDistracted', 'Perc_NoPrevAccidents', 'CarInsurancePremium', 'InsuranceCompanyLossesPerInsuredDriver']

In [5]:
df.columns = col

In [6]:
df.dtypes

State                                      object
DriversperBillionMiles                    float64
Perc_Speeding                               int64
Perc_AlcoholImpaired                        int64
Perc_NotDistracted                          int64
Perc_NoPrevAccidents                        int64
CarInsurancePremium                       float64
InsuranceCompanyLossesPerInsuredDriver    float64
dtype: object

In [7]:
df.head()

Unnamed: 0,State,DriversperBillionMiles,Perc_Speeding,Perc_AlcoholImpaired,Perc_NotDistracted,Perc_NoPrevAccidents,CarInsurancePremium,InsuranceCompanyLossesPerInsuredDriver
0,Alabama,18.8,39,30,96,80,784.55,145.08
1,Alaska,18.1,41,25,90,94,1053.48,133.93
2,Arizona,18.6,35,28,84,96,899.47,110.35
3,Arkansas,22.4,18,26,94,95,827.34,142.39
4,California,12.0,35,28,91,89,878.41,165.63


In [8]:
df_columnar = ColumnDataSource(df)

In [9]:
output_notebook()

<h1>Scatter plot using Bokeh Plotting Interface </h1>

In [10]:
hover = HoverTool(tooltips = [
        ('State','@State')
    ])
p = figure(plot_width = 500, plot_height = 400, tools = 'pan,reset,save,wheel_zoom')
p.add_tools(hover)
p.circle(source = df_columnar, x = "DriversperBillionMiles", y = "CarInsurancePremium")
p.title.text = "Scatterplot"
p.title.align = "center"
p.xaxis.axis_label = "DriversperBillionMiles"
p.yaxis.axis_label = "CarInsurancePremium"
show(p)

<h1>Bar Chart using Bokeh Charts Interface</h1>

In [11]:
from bokeh.charts import Bar

In [12]:
p1 = Bar(data = df, label = "State", values="CarInsurancePremium", legend = None, tools = "hover,pan, reset")
hover = p1.select(dict(type = HoverTool))
hover.tooltips = [
        ('State','@x'), 
        ('CarInsurancePremium', '@height')
    ]
p1.plot_width = 500
p1.plot_height = 400
show(p1)

In [13]:
pp1 = hplot(p,p1)

  if __name__ == '__main__':


In [14]:
show(pp1)

<h1>Linked plots using Bokeh Plotting Interface </h1>

In [15]:
from bokeh.io import gridplot

In [16]:
TOOLS = "box_select,lasso_select,box_zoom, reset"
s1 = figure(plot_width = 400, plot_height = 400, tools = TOOLS)
s1.circle(source = df_columnar, x = "DriversperBillionMiles",y= "CarInsurancePremium")
s1.xaxis.axis_label = "DriversperBillionMiles"
s1.yaxis.axis_label = "CarInsurancePremium"

s2 = figure(plot_width = 400, plot_height = 400, tools = TOOLS)
s2.circle(source = df_columnar, x = "Perc_Speeding",y= "CarInsurancePremium")
s2.xaxis.axis_label = "Perc_Speeding"
s2.yaxis.axis_label = "CarInsurancePremium"

s = gridplot([[s1,s2]])

show(s)
