# Data: 2015 Traffic Fatalities from National Highway Traffic Safety Administration (NHTSA)

Codification of fields and variables can be found at: https://www.nber.org/fars/ftp.nhtsa.dot.gov/fars/FARS-DOC/Analytical%20User%20Guide/USERGUIDE-2015.pdf

In [1]:
#Importation of libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly as py
import cufflinks as cf
cf.go_offline()

## Database: accident

In [2]:
#Importation of database

data = pd.read_csv('data/accident.csv')
data.head()

Unnamed: 0,STATE,ST_CASE,VE_TOTAL,VE_FORMS,PVH_INVL,PEDS,PERNOTMVIT,PERMVIT,PERSONS,COUNTY,...,NOT_MIN,ARR_HOUR,ARR_MIN,HOSP_HR,HOSP_MN,CF1,CF2,CF3,FATALS,DRUNK_DR
0,1,10001,1,1,0,0,0,1,1,127,...,99,2,58,88,88,0,0,0,1,1
1,1,10002,1,1,0,0,0,1,1,83,...,99,22,20,88,88,0,0,0,1,0
2,1,10003,1,1,0,0,0,2,2,11,...,99,1,45,99,99,0,0,0,1,1
3,1,10004,1,1,0,0,0,1,1,45,...,99,1,15,88,88,0,0,0,1,1
4,1,10005,2,2,0,0,0,2,2,45,...,99,7,16,88,88,0,0,0,1,0


In [3]:
data.columns

Index(['STATE', 'ST_CASE', 'VE_TOTAL', 'VE_FORMS', 'PVH_INVL', 'PEDS',
       'PERNOTMVIT', 'PERMVIT', 'PERSONS', 'COUNTY', 'CITY', 'DAY', 'MONTH',
       'YEAR', 'DAY_WEEK', 'HOUR', 'MINUTE', 'NHS', 'RUR_URB', 'FUNC_SYS',
       'RD_OWNER', 'ROUTE', 'TWAY_ID', 'TWAY_ID2', 'MILEPT', 'LATITUDE',
       'LONGITUD', 'SP_JUR', 'HARM_EV', 'MAN_COLL', 'RELJCT1', 'RELJCT2',
       'TYP_INT', 'WRK_ZONE', 'REL_ROAD', 'LGT_COND', 'WEATHER1', 'WEATHER2',
       'WEATHER', 'SCH_BUS', 'RAIL', 'NOT_HOUR', 'NOT_MIN', 'ARR_HOUR',
       'ARR_MIN', 'HOSP_HR', 'HOSP_MN', 'CF1', 'CF2', 'CF3', 'FATALS',
       'DRUNK_DR'],
      dtype='object')

In [4]:
data.shape

(32166, 52)

In [5]:
#Reduction of columns to get useful ones for the study and formatting to lowercase

df = data[['ST_CASE', 'DAY_WEEK', 'DAY', 'MONTH', 'YEAR', 'STATE', 'FATALS', 'WEATHER']]
df.columns = df.columns.str.lower()
df.head()

Unnamed: 0,st_case,day_week,day,month,year,state,fatals,weather
0,10001,5,1,1,2015,1,1,1
1,10002,5,1,1,2015,1,1,10
2,10003,5,1,1,2015,1,1,1
3,10004,1,4,1,2015,1,1,10
4,10005,4,7,1,2015,1,1,1


In [6]:
# Changing state code for state name 

states = {1: 'Alabama', 2: 'Alaska', 4: 'Arizona', 5: 'Arkansas', 
          6: 'California', 8: 'Colorado', 9: 'Connecticut', 10: 'Delaware', 
          11: 'District of Columbia', 12: 'Florida', 13: 'Georgia', 15: 'Hawaii', 
          16: 'Idaho', 17: 'Illinois', 18: 'Indiana', 19: 'Iowa', 20: 'Kansas', 
          21: 'Kentucky', 22: 'Louisiana', 23: 'Maine', 24: 'Maryland', 
          25: 'Massachusetts', 26: 'Michigan', 27: 'Minnesota', 
          28: 'Mississippi', 29: 'Missouri', 30: 'Montana', 31: 'Nebraska', 
          32: 'Nevada', 33: 'New Hampshire', 34: 'New Jersey', 35: 'New Mexico', 
          36: 'New York', 37: 'North Carolina', 38: 'North Dakota', 39: 'Ohio', 
          40: 'Oklahoma', 41: 'Oregon', 42: 'Pennsylvania', 43: 'Puerto Rico', 
          44: 'Rhode Island', 45: 'South Carolina', 46: 'South Dakota', 47: 'Tennessee', 
          48: 'Texas', 49: 'Utah', 50: 'Vermont', 51: 'Virginia', 52: 'Virgin Islands', 
          53: 'Washington', 54: 'West Virginia', 55: 'Wisconsin', 56: 'Wyoming'}

df = df.replace({'state': states})
df.head()

Unnamed: 0,st_case,day_week,day,month,year,state,fatals,weather
0,10001,5,1,1,2015,Alabama,1,1
1,10002,5,1,1,2015,Alabama,1,10
2,10003,5,1,1,2015,Alabama,1,1
3,10004,1,4,1,2015,Alabama,1,10
4,10005,4,7,1,2015,Alabama,1,1


In [7]:
# Changing week day code for week day name

days = {1: 'Sunday', 2: 'Monday', 3: 'Tuesday', 4: 'Wednesday', 
          5: 'Thursday', 6: 'Friday', 7: 'Saturday'}

df = df.replace({'day_week': days})
df.head()

Unnamed: 0,st_case,day_week,day,month,year,state,fatals,weather
0,10001,Thursday,1,1,2015,Alabama,1,1
1,10002,Thursday,1,1,2015,Alabama,1,10
2,10003,Thursday,1,1,2015,Alabama,1,1
3,10004,Sunday,4,1,2015,Alabama,1,10
4,10005,Wednesday,7,1,2015,Alabama,1,1


In [8]:
# Changing weather code for weather condition name

weather = {0: 'No Additional Atmospheric Conditions', 1: 'Clear', 
           2: 'Rain', 3: 'Sleet, Hail', 
           4: 'Snow', 5: 'Fog, Smog, Smoke', 6: 'Severe Crosswinds', 
           7: 'Blowing Sand, Soil, Dirt', 
           8: 'Other', 10: 'Cloudy', 11: 'Blowing Snow', 
           12: 'Freezing Rain or Drizzle', 
           98: 'Not Reported', 99: 'Unknown'}

df = df.replace({'weather': weather})
df.head()

Unnamed: 0,st_case,day_week,day,month,year,state,fatals,weather
0,10001,Thursday,1,1,2015,Alabama,1,Clear
1,10002,Thursday,1,1,2015,Alabama,1,Cloudy
2,10003,Thursday,1,1,2015,Alabama,1,Clear
3,10004,Sunday,4,1,2015,Alabama,1,Cloudy
4,10005,Wednesday,7,1,2015,Alabama,1,Clear


In [9]:
#Adding column 'date' in datetime format

df['date'] = pd.to_datetime(df[['day', 'month', 'year']])
df.head()

Unnamed: 0,st_case,day_week,day,month,year,state,fatals,weather,date
0,10001,Thursday,1,1,2015,Alabama,1,Clear,2015-01-01
1,10002,Thursday,1,1,2015,Alabama,1,Cloudy,2015-01-01
2,10003,Thursday,1,1,2015,Alabama,1,Clear,2015-01-01
3,10004,Sunday,4,1,2015,Alabama,1,Cloudy,2015-01-04
4,10005,Wednesday,7,1,2015,Alabama,1,Clear,2015-01-07


## Database: person

In [10]:
#Importation of database

data_person = pd.read_csv('data/person.csv')
data_person.head()

Unnamed: 0,STATE,ST_CASE,VE_FORMS,VEH_NO,PER_NO,STR_VEH,COUNTY,DAY,MONTH,HOUR,...,DEATH_TM,LAG_HRS,LAG_MINS,P_SF1,P_SF2,P_SF3,WORK_INJ,HISPANIC,RACE,LOCATION
0,1,10001,1,1,1,0,127,1,1,2,...,240,0,0,0,0,0,0,7,1,0
1,1,10002,1,1,1,0,83,1,1,22,...,2213,0,0,0,0,0,0,7,1,0
2,1,10003,1,1,1,0,11,1,1,1,...,125,0,0,0,0,0,0,7,2,0
3,1,10003,1,1,2,0,11,1,1,1,...,8888,999,99,0,0,0,8,0,0,0
4,1,10004,1,1,1,0,45,4,1,0,...,57,0,0,0,0,0,0,7,1,0


In [11]:
data_person.shape

(80587, 68)

In [12]:
data_person.columns

Index(['STATE', 'ST_CASE', 'VE_FORMS', 'VEH_NO', 'PER_NO', 'STR_VEH', 'COUNTY',
       'DAY', 'MONTH', 'HOUR', 'MINUTE', 'RUR_URB', 'FUNC_SYS', 'HARM_EV',
       'MAN_COLL', 'SCH_BUS', 'MAKE', 'MAK_MOD', 'BODY_TYP', 'MOD_YEAR',
       'TOW_VEH', 'SPEC_USE', 'EMER_USE', 'ROLLOVER', 'IMPACT1', 'FIRE_EXP',
       'AGE', 'SEX', 'PER_TYP', 'INJ_SEV', 'SEAT_POS', 'REST_USE', 'REST_MIS',
       'AIR_BAG', 'EJECTION', 'EJ_PATH', 'EXTRICAT', 'DRINKING', 'ALC_DET',
       'ALC_STATUS', 'ATST_TYP', 'ALC_RES', 'DRUGS', 'DRUG_DET', 'DSTATUS',
       'DRUGTST1', 'DRUGTST2', 'DRUGTST3', 'DRUGRES1', 'DRUGRES2', 'DRUGRES3',
       'HOSPITAL', 'DOA', 'DEATH_DA', 'DEATH_MO', 'DEATH_YR', 'DEATH_HR',
       'DEATH_MN', 'DEATH_TM', 'LAG_HRS', 'LAG_MINS', 'P_SF1', 'P_SF2',
       'P_SF3', 'WORK_INJ', 'HISPANIC', 'RACE', 'LOCATION'],
      dtype='object')

In [13]:
#Reduction of columns to get useful ones for the study and formatting to lowercase

df_person = data_person[['ST_CASE','RUR_URB','MOD_YEAR', 'AGE', 'SEX','ALC_RES', 'DRUGRES1']]
df_person.columns = df_person.columns.str.lower()
df_person.head()

Unnamed: 0,st_case,rur_urb,mod_year,age,sex,alc_res,drugres1
0,10001,1,2003.0,68,1,189,1
1,10002,1,2006.0,49,1,0,358
2,10003,1,2008.0,31,1,188,1
3,10003,1,2008.0,20,2,996,0
4,10004,1,2005.0,40,1,260,1


In [14]:
# New column for drug use yes/no

no_drugs = [0, 1, 95]
drugs = [358, 300, 167, 348, 155, 605, 401, 402, 343, 417, 177,
       407, 996, 124, 321, 351, 387, 600, 338, 998, 367, 695, 603, 189,
       423, 316, 337, 308, 151, 128, 795, 522, 313, 188,
       154, 378, 157, 304, 336, 187, 301, 334, 702, 405, 303, 136, 127,
       601, 505, 100, 513, 528, 396, 295, 418, 178, 144, 352, 355, 376,
       181, 176, 400, 995, 833, 395, 224, 232, 503, 495, 408, 500, 171,
       165, 163, 410, 515, 239, 325, 208, 210, 309, 302, 226, 373, 307,
       318, 808, 377, 437, 403, 255, 595, 245, 327, 249, 393, 366, 422]
nan_drugs = [997, 999]

drug_use = []
for i in df_person['drugres1']:
    if i in(no_drugs):
        drug_use.append('No')   
    if i in(nan_drugs):        
        drug_use.append(np.nan)
    if i in(drugs):        
        drug_use.append('Yes')

    
df_person['drug_use'] = drug_use
df_person.head()

Unnamed: 0,st_case,rur_urb,mod_year,age,sex,alc_res,drugres1,drug_use
0,10001,1,2003.0,68,1,189,1,No
1,10002,1,2006.0,49,1,0,358,Yes
2,10003,1,2008.0,31,1,188,1,No
3,10003,1,2008.0,20,2,996,0,No
4,10004,1,2005.0,40,1,260,1,No


In [15]:
# New column for alcohol use yes/no

no_alc = [0]
alc = [189, 188, 260, 239, 153, 127,  15, 214, 225, 217, 107,
       158, 154, 124, 242, 270, 324, 141,  81, 177,  43, 215, 130, 205,
       224,  77, 250, 145,  58, 244, 119, 349, 135, 142,  87, 241, 122,
       396,  69,  95, 117, 197, 102,  48,  10,  86,  97, 216,  70, 111,
       235, 126,  16, 146, 181, 149,  88, 361, 245, 184, 171, 196,  39,
       218,  93, 106, 168,  76, 227, 323, 293,  13, 246, 209, 302,
        67, 202, 267, 163, 167, 211,  92, 204,  55, 198, 477, 318, 120,
       164, 165, 161, 155, 103, 143, 160, 255,  23, 166,  83, 247, 137,
        22,  28, 253,  80,  25, 243,  45, 109,  96,  62, 138, 300, 115,
       159, 208,  52, 401, 280,  50, 233, 169, 151, 152, 212,  33, 500,
       286, 289, 195, 108, 248,  73, 322, 226, 219, 132, 222,  29, 307,
       221,  61, 174, 240, 113, 114, 200, 317, 236,  36,  90, 116, 415,
       275, 193, 258, 140, 162, 136, 269, 185, 310,  20, 353, 186, 281,
       144, 207, 191, 129, 314, 210, 342, 157, 194, 180, 150,
       278, 190, 199, 101, 321, 173, 110,  30, 283, 420, 220, 263, 230,
     147, 100, 290, 179, 125, 232,  21, 112,  35, 206, 330,  38,
       228, 301, 450, 139, 176, 237,  24, 175, 320,  42,  40, 187, 288,
       259,  64, 449, 203, 298, 287, 234, 291, 350, 249, 272, 309, 231,
       273, 121, 340, 104, 254, 345, 551,  60, 354, 170, 251,  84, 213,
       123,  85, 229, 156, 256,  54, 128, 105,  82, 295, 299, 131, 325,
        27,  75,  11,  12, 268,  26, 274, 303, 172,  99,  65, 276,  53,
        41,  91,  31, 328,  14, 351,  32, 266, 400, 223, 380, 182, 403,
       271,   1,  94, 390,  78, 530,  49,   2, 370, 178, 305, 436,  17,
       480, 183,  63,  72, 148,  19, 360,  71,  51,   4, 650,   5, 404,
       118, 316, 545, 331, 285, 134,  47, 374, 192,  37,  18,  57, 410,
       252,  46, 429,  89,  79, 238,   7,  66,   9, 414,  74, 304, 312,
       282, 327, 292, 398, 333, 319, 261, 264, 388, 133, 296,  98, 430,
       265,  34, 460, 279, 294, 343, 442,   3, 368,  68,  59, 297, 411,
         6, 372, 329, 257,  56, 284, 277, 311, 201, 940, 371,  44, 262,
       334, 335, 385, 339,   8, 308, 341, 469, 352, 326, 344, 428, 383,
       337, 426, 438, 348, 358, 306, 531, 376, 433, 435, 510, 412, 313,
       362, 367, 392, 378, 577, 514, 332, 407, 422, 527, 346, 409, 509,
       837, 440, 364, 389, 431, 470, 406, 413, 421, 315, 490, 386, 456,
       379, 466, 369, 359, 347, 416, 338, 584, 402, 680, 544, 483, 395,
       397, 451, 408, 419, 356, 770, 790]
nan_alc = [995, 996, 997, 998, 999]

alc_use = []
for i in df_person['alc_res']:
    if i in(no_alc):
        alc_use.append('No')   
    if i in(nan_alc):        
        alc_use.append(np.nan)
    if i in(alc):        
        alc_use.append('Yes')

df_person['alc_use'] = alc_use
df_person.head()

Unnamed: 0,st_case,rur_urb,mod_year,age,sex,alc_res,drugres1,drug_use,alc_use
0,10001,1,2003.0,68,1,189,1,No,Yes
1,10002,1,2006.0,49,1,0,358,Yes,No
2,10003,1,2008.0,31,1,188,1,No,Yes
3,10003,1,2008.0,20,2,996,0,No,
4,10004,1,2005.0,40,1,260,1,No,Yes


In [16]:
# Changing column 'rur_urb' for text 'rural'/'urban'

rural = [1]
urban = [2]
nan_road = [6, 8, 9]

road = []
for i in df_person['rur_urb']:
    if i in(rural):
        road.append('Rural')   
    if i in(nan_road):        
        road.append(np.nan)
    if i in(urban):        
        road.append('Urban')
        
df_person['rur_urb'] = road
df_person.head()

Unnamed: 0,st_case,rur_urb,mod_year,age,sex,alc_res,drugres1,drug_use,alc_use
0,10001,Rural,2003.0,68,1,189,1,No,Yes
1,10002,Rural,2006.0,49,1,0,358,Yes,No
2,10003,Rural,2008.0,31,1,188,1,No,Yes
3,10003,Rural,2008.0,20,2,996,0,No,
4,10004,Rural,2005.0,40,1,260,1,No,Yes


 ## Figures

In [39]:
#Bar plot number of accidents per state

df_states = df.groupby('state', as_index=False).agg({'st_case':'count'})


df_states.iplot(kind='bar', x='state', y='st_case', color='blue',
           yTitle='Accidents', title='Accidents per state')

In [18]:
#To normalize with population per state I import a database of population and clean it to fit the state column of my dataframe

df_population = pd.read_excel('PEP_2015_PEPANNRES.xls')
df_population2015 = df_population[['Unnamed: 0', 'Unnamed: 11']]
df_population2015 = df_population2015.drop(df_population2015.index[0:3])
df_population2015 = df_population2015.drop(df_population2015.index[51:])
df_population2015.columns= ['state', 'population']
df_population2015.head()

Unnamed: 0,state,population
3,Alabama,4858979
4,Alaska,738432
5,Arizona,6828065
6,Arkansas,2978204
7,California,39144818


In [19]:
#Now I merge both dataframes

states_pop = pd.merge(df_states, df_population2015, on='state')
states_pop.head()

Unnamed: 0,state,st_case,population
0,Alabama,783,4858979
1,Alaska,60,738432
2,Arizona,810,6828065
3,Arkansas,472,2978204
4,California,2925,39144818


In [20]:
#And draw the scatter plot accidents/population per state

states_pop.iplot(kind='scatter', mode='markers', x='population', y='st_case', text='state', 
                 yTitle='Accidents', xTitle='State population', title='Accidents per state population', color='blue')

In [21]:
#Bar plot accidents in rural or urban roads

data_road = df_person.groupby('rur_urb', as_index=False).agg({'st_case':'count'})

data_road.iplot(kind='bar', x='rur_urb', y='st_case',
           yTitle='Number of accidents', title='Accidents in rural / urban roads', color='blue')

In [22]:
#Line plot number of accidents per day

accidents_date = df.groupby('date',as_index=False)['st_case'].count()

accidents_date.iplot(x='date', y='st_case', yTitle='Accidents', 
                     title='Number of accidents per day', color='blue')

In [23]:
#Plotting accidents per week day

df['day_week'].iplot(kind='hist', xTitle='Number of accidents', 
           title='Accidents per week day', orientation='h', color='blue')

In [24]:
#Plotting accidents per month day

df['day'].iplot(kind='hist', xTitle='Day', yTitle='Number of accidents', 
           title='Accidents per month day', color='blue')

In [25]:
#grouping by accidents and counting victims in each one

fatals_accident = df.groupby('st_case', as_index=False).agg({'fatals':'sum'})

#grouping by number of victims and counting cases

fatals_accident2 = fatals_accident.groupby('fatals').agg({'st_case':'count'})

#Plotting

fatals_accident2.iplot(kind='bar', xTitle='Victims', yTitle='Number of accidents', 
           title='Victims per accident', color='blue')

#Counting total victims

total_victims = fatals_accident['fatals'].sum()
print('Total victims 2015: ', total_victims)


Total victims 2015:  35092


In [26]:
#Plotting number of victims per state

victims_state = df.groupby('state', as_index=False).agg({'fatals':'sum'})

victims_state.iplot(kind='bar', x='state', y='fatals', xTitle='Victims', yTitle='Number of victims', 
           title='Victims per accident in each state', color='blue')

In [27]:
# Changing sex column 1 and 0 for male and female

male = [1]
female = [2]
nan_sex = [8, 9]

sex = []
for i in df_person['sex']:
    if i in(male):
        sex.append('Male')   
    if i in(nan_sex):        
        sex.append(np.nan)
    if i in(female):        
        sex.append('Female')
        
df_person['sex'] = sex
df_person.head()

Unnamed: 0,st_case,rur_urb,mod_year,age,sex,alc_res,drugres1,drug_use,alc_use
0,10001,Rural,2003.0,68,Male,189,1,No,Yes
1,10002,Rural,2006.0,49,Male,0,358,Yes,No
2,10003,Rural,2008.0,31,Male,188,1,No,Yes
3,10003,Rural,2008.0,20,Female,996,0,No,
4,10004,Rural,2005.0,40,Male,260,1,No,Yes


In [28]:
#Plotting sex

df_sex = df_person.groupby('sex', as_index=False).agg({'st_case':'count'})
df_sex

Unnamed: 0,sex,st_case
0,Female,26631
1,Male,52598


In [29]:
df_sex.iplot(kind='bar', x='sex', y='st_case', yTitle='Number of accidents', 
           title='Male and female involved in accidents', color='blue')

In [44]:
#Plotting age

df_age = df_person.groupby('age', as_index=False).agg({'st_case':'count'})
df_age = df_age.loc[df_age['age']<=97]
df_age.head(10)

Unnamed: 0,age,st_case
0,0,337
1,1,441
2,2,410
3,3,418
4,4,374
5,5,389
6,6,395
7,7,361
8,8,384
9,9,426


In [31]:
df_age.iplot(kind='bar', x='age', y='st_case', xTitle='Age', yTitle='Number of accidents', 
           title='Age of people involved in accidents', color='blue')

# Factors involved in accidents

## Weather

In [32]:
#New column 'conditions' grouping good and bad weather conditions

bad_weather = ['Rain', 'Fog, Smog, Smoke', 'Sleet, Hail',
       'Snow', 'Freezing Rain or Drizzle',
       'Severe Crosswinds', 'Blowing Sand, Soil, Dirt',
       'Blowing Snow']

good_weather = ['Clear', 'Cloudy']

nan_weather = ['Other', 'Not Reported', 'Unknown']

conditions = []
for i in df['weather']:
    if i in(bad_weather):
        conditions.append('bad')
    if i in(good_weather):        
        conditions.append('good')
    if i in(nan_weather):        
        conditions.append(np.nan)

df['conditions'] = conditions
df.head()

Unnamed: 0,st_case,day_week,day,month,year,state,fatals,weather,date,conditions
0,10001,Thursday,1,1,2015,Alabama,1,Clear,2015-01-01,good
1,10002,Thursday,1,1,2015,Alabama,1,Cloudy,2015-01-01,good
2,10003,Thursday,1,1,2015,Alabama,1,Clear,2015-01-01,good
3,10004,Sunday,4,1,2015,Alabama,1,Cloudy,2015-01-04,good
4,10005,Wednesday,7,1,2015,Alabama,1,Clear,2015-01-07,good


In [33]:
#Plotting weather conditions

df_weather = df.groupby('conditions', as_index=False).agg({'st_case':'count'})
df_weather

Unnamed: 0,conditions,st_case
0,bad,3367
1,good,28385


In [34]:
df_weather.iplot(kind='bar', x='conditions', y='st_case', yTitle='Number of accidents', 
           title='Weather conditions in accidents', color='blue')

## Cars antiquity

In [35]:
#Plotting cars antiquity

data_year = df_person.groupby('mod_year', as_index=False).agg({'st_case':'count'})

data_year = data_year[data_year.mod_year < 2016]

data_year['car_age'] = 2015 - data_year['mod_year']

data_year.iplot(kind='bar', x='mod_year', y='st_case', xTitle='Model Year', color='blue',
           yTitle='Number of accidents', title='Antiquity of cars involved in accidents')

## Alcohol and drugs

In [36]:
#Plotting positive alcohol

df_alc = df_person.groupby('alc_use', as_index=False).agg({'st_case':'count'})
df_alc

Unnamed: 0,alc_use,st_case
0,No,19313
1,Yes,10044


In [37]:
df_alc.iplot(kind='bar', x='alc_use', y='st_case', yTitle='Number of accidents', 
           title='Alcohol consumption in accidents', color='blue')

In [38]:
#Plotting alcohol level when positive

alc_values = df_person.loc[(df_person['alc_res']<995) & (df_person['alc_res']>0)]

alc_values['alc_res'].iplot(kind='hist', xTitle='Alcohol level', yTitle='Number of persons', 
           title='Alcohol level when positive test', vline=80, bins=50, color='blue')

In [40]:
#Plotting positive drugs

df_drugs = df_person.groupby('drug_use', as_index=False).agg({'st_case':'count'})
df_drugs

Unnamed: 0,drug_use,st_case
0,No,69758
1,Yes,9086


In [42]:
df_drugs.iplot(kind='bar', x='drug_use', y='st_case', yTitle='Number of accidents', 
           title='Drug use in accidents', color='blue')