In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

This will be a **visual** analisys about accidents in U.S. I hope to extract some useful insights from the data we'll plot. It will be my second practice with the Plotly libray, which I personally liked more as it has a better visual and also has friendly graphs.

If you like these visualisations, please give me a upvote or leave a comment, I highly appreciate that and this will keep me motivated to keep learning more :)

# Columns on Dataset

From the dataset page, we know that the dataset contain lots of columns. I think some of them are not very useful, so, we'll first check the columns and import only the meaningful ones. All removed columns will have a discussion on why they were excluded.

Well, that's a lot of features!

Let's first removing the following:

- ID: Not necessary, we can count the amount of accidents without it.
- Source: It only shows the source from the data
- TMCA: It only describe a code that is related to extra details of the event, not useful for this analysis.
- End_Time: It's not relevant when the accident ended, as it's supposed to end in the same day.
- Start_lat: We already have data from cities. If we need more detailed analisys on a specific location, we can use this data. 
- Start_Lng: We already have data from cities
- End_Lat: Column is empty
- End_Lng: Column is empty
- Distance: Does not affect our visualisations goal
- Description: Too many unique values
- Number: It shows the street, number, as accidents happen everywhere in US, I don't believe there's a relation to the street number and accidents.
- Street: Too many unique values, same as above.
- Side: It only serves to show that the side Right has way more accidents. I guess left is related to inverted streets (like in UK)
- Zipcode: Too many unique values, the data from City/County/State already have this information
- Country: All accidents are in US Territory.
- Airport_Code: It only shows the code of a nearbt airport, some accidents may don't have it.
- Weather_Timestamp: We can get this data from temperature and time already.
- Wind_Direction: This may be useful for a specific accident analisys, as the wind may interefere in driving, but this does not apply for an analisys on all accidents
- Wind_Speed: Has lots of missing values, so we'll remove as this may affect analysis.
- Wind_Chill(F): The temperature already speaks for it.
- All Boolean columns, except Traffic_Signal: They only show True or False values and the count for each one can already be seen on the dataset page. Traffic_Signal has a different value from all others and may be useful to take a deeper look into it.
- Nautical_Twilight, Astronomical_Twilight: I guess the only important thing is if day or night in the local, of if it's day or night on civil twilight, as some of them may be related to more accidents due to leaving or arriving work/home.

Removed after a first analysis

- Timezone: Not useful for out visualisation goal
- Pressure: Not useful
- Visibility: Most of accidents have good visibility
- Precipitation(in): The huge majority of accidents happens on precipitation values lower than 0.25


In total, we removed most of the columns!

In [None]:
cols = ['Severity', 'Start_Time', 'City', 'County', 'State', 'Temperature(F)',
        'Humidity(%)', 'Precipitation(in)', 'Weather_Condition',
       'Traffic_Signal', 'Sunrise_Sunset']
## Remember to tround latitude and longitude to 2 digits, we're interested in checking if any specific place may has more accidents than others.

In [None]:
df = pd.read_csv("../input/us-accidents/US_Accidents_Dec20.csv", usecols=cols)

In [None]:
df.head()

# Data checking

In [None]:
df.info()

Look on how we decreased our data usage! Let's decrease even more by removing lines that are blank or need some edition!

In [None]:
df.isnull().sum()

I assume that those lines with no precipitaion in it are cases in there was no rain, so we'll input a 0 on these lines.
Regarding the other columns, we'll simply remove the lines. As we have many columns, it won't harm to remove a few lines from it.

# Removal or edition of blank cells

We see a lof of missing values on Precipitation, we'll fill these with a 0, as I suppose if there's no value there was no rain at the time of the accident.

In [None]:
df['Precipitation(in)'].fillna(0, inplace=True)

And now we'll remove all other rows with NaN values, the dataset is large enough, so this won't affect it heavily.

In [None]:
df.dropna(axis=0, inplace=True)

Now, our dataset is more clean and organized. Let's now edit our data!

# Data edition

We'll now extract day, month and year from the Start_Time feature, let's try to make some cool graphs with the separated columns

In [None]:
df['Start_Time'] = pd.to_datetime(df['Start_Time'])

In [None]:
## Extracting each data from the date. Separating it in it's own column will help with visualizations.
df['Week_day'] = df['Start_Time'].dt.dayofweek
df['Month'] = df['Start_Time'].dt.month
df['Year'] = df['Start_Time'].dt.year

Now, let's convert our Week_day into text, 0 will be Monday and 6 will be Sunday.

We'll do the same for the month, 1 will be January and 12 December. This will make our graphs easier to read.

In [None]:
df['Week_day'] = df['Week_day'].map({0:'Monday', 1:'Tuesday', 2: 'Wednesday', 3:'Thursday', 4:'Friday', 5:'Saturday', 6:'Sunday'})
df['Month'] = df['Month'].map({1:'January', 2:'February', 3: 'March', 4:'April', 5:'May', 6:'June', 7:'July',
                                  8:'August', 9:'September', 10: 'October', 11:'November', 12:'December'})

In [None]:
df.drop("Start_Time", axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.head()

## Plotting the Data

In [None]:
import plotly
import plotly.graph_objs as go
import plotly.offline as py

In [None]:
plotly.offline.init_notebook_mode(connected=True)

In [None]:
severity = df['Severity'].groupby(by=df['Severity']).count()
severity

## Severity Data

In [None]:
#Selecting data to plot
data = [go.Pie(labels=severity.index,
               values=severity.values,
               direction='clockwise')
       ]

# Editing style
layout = go.Layout(title='Severity of accidents',
                   width=600,
                   height=600                   
                  )
#Creating figure
fig = go.Figure(data=data, layout=layout)

## Plotting
py.iplot(fig)

In [None]:
accidents_years = pd.DataFrame(df['Year'].groupby(df['Year']).count())
accidents_years

In [None]:
data = [go.Bar(x=[2016,2017,2018,2019,2020],
               y=accidents_years['Year'])]

layout = go.Layout(title='Accidents by year 2016-June 2020',
                   xaxis={'title':'Year'},
                   yaxis={'title':'Number of accidents'},
                   width=700,
                   height=600)


fig = go.Figure(data=data, layout=layout)
fig.update_yaxes(nticks=4)

py.iplot(fig)

In [None]:
m2016 = pd.DataFrame(df['Month'].groupby(by=df['Month'].loc[df['Year']==2016]).count())

m2017 = pd.DataFrame(df['Month'].groupby(by=df['Month'].loc[df['Year']==2017]).count())

m2018 = pd.DataFrame(df['Month'].groupby(df['Month'].loc[df['Year']==2018]).count())

m2019 = pd.DataFrame(df['Month'].groupby(df['Month'].loc[df['Year']==2019]).count())

m2020 = pd.DataFrame(df['Month'].groupby(df['Month'].loc[df['Year']==2020]).count())

## First we filter the data by Month and then, count the times the month appear in a year (indexed by loc)
## Remember "Month" at the left is index, "month" at the right is the column, this will be usefull when creating a new organized dataframe later on

In [None]:
accidents_months_2016 = pd.DataFrame({'Month of the year - 2016': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
               'Total Accidents':          [0, 
                                            m2016['Month'].loc[m2016.index=='February'].sum(),      
                                            m2016['Month'].loc[m2016.index=='March'].sum(), 
                                            m2016['Month'].loc[m2016.index=='April'].sum(), 
                                            m2016['Month'].loc[m2016.index=='May'].sum(),
                                            m2016['Month'].loc[m2016.index=='June'].sum(), 
                                            m2016['Month'].loc[m2016.index=='July'].sum(), 
                                            m2016['Month'].loc[m2016.index=='August'].sum(),  
                                            m2016['Month'].loc[m2016.index=='September'].sum(),      
                                            m2016['Month'].loc[m2016.index=='October'].sum(), 
                                            m2016['Month'].loc[m2016.index=='November'].sum(), 
                                            m2016['Month'].loc[m2016.index=='December'].sum()]})
accidents_months_2016

In [None]:
accidents_months_2017 = pd.DataFrame({'Month of the year - 2017': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
                                      'Total Accidents': [m2017['Month'].loc[m2017.index=='January'].sum(), 
                                                          m2017['Month'].loc[m2017.index=='February'].sum(),
                                                          m2017['Month'].loc[m2017.index=='March'].sum(), 
                                                          m2017['Month'].loc[m2017.index=='April'].sum(), 
                                                          m2017['Month'].loc[m2017.index=='May'].sum(),
                                                          m2017['Month'].loc[m2017.index=='June'].sum(),
                                                          m2017['Month'].loc[m2017.index=='July'].sum(), 
                                                          m2017['Month'].loc[m2017.index=='August'].sum(),  
                                                          m2017['Month'].loc[m2017.index=='September'].sum(),      
                                                          m2017['Month'].loc[m2017.index=='October'].sum(), 
                                                          m2017['Month'].loc[m2017.index=='November'].sum(), 
                                                          m2017['Month'].loc[m2017.index=='December'].sum()]})

In [None]:
accidents_months_2018 = pd.DataFrame({'Month of the year - 2018': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
               'Total Accidents':          [m2018['Month'].loc[m2018.index=='January'].sum(), 
                                            m2018['Month'].loc[m2018.index=='February'].sum(),      
                                            m2018['Month'].loc[m2018.index=='March'].sum(), 
                                            m2018['Month'].loc[m2018.index=='April'].sum(), 
                                            m2018['Month'].loc[m2018.index=='May'].sum(),
                                            m2018['Month'].loc[m2018.index=='June'].sum(), 
                                            m2018['Month'].loc[m2018.index=='July'].sum(), 
                                            m2018['Month'].loc[m2018.index=='August'].sum(),  
                                            m2018['Month'].loc[m2018.index=='September'].sum(),      
                                            m2018['Month'].loc[m2018.index=='October'].sum(), 
                                            m2018['Month'].loc[m2018.index=='November'].sum(), 
                                            m2018['Month'].loc[m2018.index=='December'].sum()]})
accidents_months_2018

In [None]:
accidents_months_2019 = pd.DataFrame({'Month of the year - 2019': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
               'Total Accidents':          [m2019['Month'].loc[m2019.index=='January'].sum(), 
                                            m2019['Month'].loc[m2019.index=='February'].sum(),      
                                            m2019['Month'].loc[m2019.index=='March'].sum(), 
                                            m2019['Month'].loc[m2019.index=='April'].sum(), 
                                            m2019['Month'].loc[m2019.index=='May'].sum(),
                                            m2019['Month'].loc[m2019.index=='June'].sum(), 
                                            m2019['Month'].loc[m2019.index=='July'].sum(), 
                                            m2019['Month'].loc[m2019.index=='August'].sum(),  
                                            m2019['Month'].loc[m2019.index=='September'].sum(),      
                                            m2019['Month'].loc[m2019.index=='October'].sum(), 
                                            m2019['Month'].loc[m2019.index=='November'].sum(), 
                                            m2019['Month'].loc[m2019.index=='December'].sum()]})
accidents_months_2019

In [None]:
accidents_months_2020 = pd.DataFrame({'Month of the year - 2020': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
               'Total Accidents':          [m2020['Month'].loc[m2020.index=='January'].sum(), 
                                            m2020['Month'].loc[m2020.index=='February'].sum(),      
                                            m2020['Month'].loc[m2020.index=='March'].sum(), 
                                            m2020['Month'].loc[m2020.index=='April'].sum(), 
                                            m2020['Month'].loc[m2020.index=='May'].sum(),
                                            m2020['Month'].loc[m2020.index=='June'].sum(), 
                                            m2020['Month'].loc[m2020.index=='July'].sum(), 
                                            m2020['Month'].loc[m2020.index=='August'].sum(),  
                                            m2020['Month'].loc[m2020.index=='September'].sum(),      
                                            m2020['Month'].loc[m2020.index=='October'].sum(), 
                                            m2020['Month'].loc[m2020.index=='November'].sum(), 
                                            m2020['Month'].loc[m2020.index=='December'].sum()]})
accidents_months_2020

In [None]:
data = [go.Bar(x=accidents_months_2016['Month of the year - 2016'],
               y=accidents_months_2016['Total Accidents'],
               name='2016'),
        go.Bar(x=accidents_months_2017['Month of the year - 2017'],
               y=accidents_months_2017['Total Accidents'],
               name='2017'),
        go.Bar(x=accidents_months_2018['Month of the year - 2018'],
               y=accidents_months_2018['Total Accidents'],
               name='2018'),
        go.Bar(x=accidents_months_2019['Month of the year - 2019'],
               y=accidents_months_2019['Total Accidents'],
               name='2019'),
        go.Bar(x=accidents_months_2020['Month of the year - 2020'],
               y=accidents_months_2020['Total Accidents'],
               name='2020')
        ]

layout = go.Layout(title='Accidents per month - February 2016 - June 2020',
                   xaxis={'title':'Month of the year'},
                   yaxis={'title':'Number of accidents'},
                   width=1700,
                   height=700)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
day = df.groupby("Week_day")["Week_day"].count()
day

In [None]:
day = df['Week_day'].value_counts(sort=True)
day

We need to order tha data from 0 to 6 in the first column. Since I wasn't able to figure this out, I'll manually create it.

In [None]:
d2016 = pd.DataFrame(df['Week_day'].loc[df['Year']==2016].groupby(df['Week_day']).count())

d2017 = pd.DataFrame(df['Week_day'].loc[df['Year']==2017].groupby(df['Week_day']).count())

d2018 = pd.DataFrame(df['Week_day'].loc[df['Year']==2018].groupby(df['Week_day']).count())

d2019 = pd.DataFrame(df['Week_day'].loc[df['Year']==2019].groupby(df['Week_day']).count())

d2020 = pd.DataFrame(df['Week_day'].loc[df['Year']==2020].groupby(df['Week_day']).count())

In [None]:
d2016

In [None]:
days_2016 = pd.DataFrame({'Day of the week - 2016': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
               'Total Accidents':          [d2016['Week_day'].loc[d2016.index=='Monday'].sum(), 
                                            d2016['Week_day'].loc[d2016.index=='Tuesday'].sum(),      
                                            d2016['Week_day'].loc[d2016.index=='Wednesday'].sum(), 
                                            d2016['Week_day'].loc[d2016.index=='Thursday'].sum(), 
                                            d2016['Week_day'].loc[d2016.index=='Friday'].sum(),
                                            d2016['Week_day'].loc[d2016.index=='Saturday'].sum(), 
                                            d2016['Week_day'].loc[d2016.index=='Sunday'].sum()]})
days_2016

In [None]:
days_2017 = pd.DataFrame({'Day of the week - 2017': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
               'Total Accidents':          [d2017['Week_day'].loc[d2017.index=='Monday'].sum(), 
                                            d2017['Week_day'].loc[d2017.index=='Tuesday'].sum(),      
                                            d2017['Week_day'].loc[d2017.index=='Wednesday'].sum(), 
                                            d2017['Week_day'].loc[d2017.index=='Thursday'].sum(), 
                                            d2017['Week_day'].loc[d2017.index=='Friday'].sum(),
                                            d2017['Week_day'].loc[d2017.index=='Saturday'].sum(), 
                                            d2017['Week_day'].loc[d2017.index=='Sunday'].sum()]})
days_2017

In [None]:
days_2018 = pd.DataFrame({'Day of the week - 2018': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
               'Total Accidents':          [d2018['Week_day'].loc[d2018.index=='Monday'].sum(), 
                                            d2018['Week_day'].loc[d2018.index=='Tuesday'].sum(),      
                                            d2018['Week_day'].loc[d2018.index=='Wednesday'].sum(), 
                                            d2018['Week_day'].loc[d2018.index=='Thursday'].sum(), 
                                            d2018['Week_day'].loc[d2018.index=='Friday'].sum(),
                                            d2018['Week_day'].loc[d2018.index=='Saturday'].sum(), 
                                            d2018['Week_day'].loc[d2018.index=='Sunday'].sum()]})
days_2018

In [None]:
days_2019 = pd.DataFrame({'Day of the week - 2019': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
               'Total Accidents':          [d2019['Week_day'].loc[d2019.index=='Monday'].sum(), 
                                            d2019['Week_day'].loc[d2019.index=='Tuesday'].sum(),      
                                            d2019['Week_day'].loc[d2019.index=='Wednesday'].sum(), 
                                            d2019['Week_day'].loc[d2019.index=='Thursday'].sum(), 
                                            d2019['Week_day'].loc[d2019.index=='Friday'].sum(),
                                            d2019['Week_day'].loc[d2019.index=='Saturday'].sum(), 
                                            d2019['Week_day'].loc[d2019.index=='Sunday'].sum()]})
days_2019

In [None]:
days_2020 = pd.DataFrame({'Day of the week - 2020': ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
               'Total Accidents':          [d2020['Week_day'].loc[d2020.index=='Monday'].sum(), 
                                            d2020['Week_day'].loc[d2020.index=='Tuesday'].sum(),      
                                            d2020['Week_day'].loc[d2020.index=='Wednesday'].sum(), 
                                            d2020['Week_day'].loc[d2020.index=='Thursday'].sum(), 
                                            d2020['Week_day'].loc[d2020.index=='Friday'].sum(),
                                            d2020['Week_day'].loc[d2020.index=='Saturday'].sum(), 
                                            d2020['Week_day'].loc[d2020.index=='Sunday'].sum()]})
days_2020

The average number of accidents per month in each year, just keeps increasing. Did something happened during this period? Or just more data was collected?

Only a deeper analisys on the change on Traffic laws in US in this period can help in that. Also, an analisys on how the data was collected during these 4 years.


In [None]:
data = [go.Bar(x=days_2016['Day of the week - 2016'],
               y=days_2016['Total Accidents'],
               name='2016'),
        go.Bar(x=days_2017['Day of the week - 2017'],
               y=days_2017['Total Accidents'],
               name='2017'),
        go.Bar(x=days_2018['Day of the week - 2018'],
               y=days_2018['Total Accidents'],
               name='2018'),
        go.Bar(x=days_2019['Day of the week - 2019'],
               y=days_2019['Total Accidents'],
               name='2019'),
        go.Bar(x=days_2020['Day of the week - 2020'],
               y=days_2020['Total Accidents'],
               name='2020')]

layout = go.Layout(title='Accidents per day - February 2016 - June 2020',
                   xaxis={'title':'Day of the week'},
                   yaxis={'title':'Number of accidents'},
                   width=1700,
                   height=700)

fig = go.Figure(data=data, layout=layout)

py.iplot(data)

In [None]:
cities = df['City'].value_counts()[df['City'].value_counts()>10000]
cities
## You can change the value on the first line to filter less or more cities with the amount the accidents you want to check

In [None]:
data = [go.Bar(x=cities.index,
               y=cities.values,
               name='Cities')]

layout = go.Layout(title='Accidents per City - 2016 - 2019',
                   yaxis={'title':'Number of accidents'},
                   width=1700,
                   height=700)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

Houston has a very different number of accidents than the others, the growth is huge when compared with the previous cities.

According to: https://www.pstriallaw.com/car-accidents/facts-and-statistics/

The main causes of car crashes that our offices sees in Houston and beyond include:

**Drunk Driving**
Harris County had the most drunk-driving crashes in the state in both 2012 and 2013, according to MADD.

**Distracted Driving**
All it takes is a few seconds of looking down a cell phone to trigger a crash. Many rear-end collisions are caused by distracted driving.

**Aggressive Driving**
Aggressive driving is when someone intentionally violates traffic laws and places other drivers in danger, like speeding, swerving between lanes, running red lights and tailgating.

For a more specific look at what causes accidents on our road, according to state statistics, here are some of the most common causes of accidents in Texas:

**Speeding**
Driver inattention
Failing to yield at a left turn
Unsafe lane change
Following too closely
Failing to yield at a stop sign
Driving too slowly
Failing to yield in private drives
Driving while intoxicated

In [None]:
county = df['County'].value_counts()[df['County'].value_counts()>10000]
county

In [None]:
data = [go.Bar(x=county.index,
               y=county.values,
               name='County')]

layout = go.Layout(title='Accidents per County - 2016 - 2019',
                   yaxis={'title':'Number of accidents'},
                   width=1700,
                   height=700)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
state = df['State'].value_counts()
state

In [None]:
data = [go.Bar(x=state.index,
               y=state.values,
               name='State')]

layout = go.Layout(title='Accidents per State - 2016 - 2019',
                   yaxis={'title':'Number of accidents'},
                   width=1700,
                   height=700)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
range_temp = pd.cut(df['Temperature(F)'], 5)
range_temp.unique()

In [None]:
df.loc[ (df['Temperature(F)'] > -33) & (df['Temperature(F)'] <= 10), 'Temperature(F)']=1
df.loc[ (df['Temperature(F)'] > 10) & (df['Temperature(F)'] <= 50), 'Temperature(F)']=2
df.loc[ (df['Temperature(F)'] > 50) & (df['Temperature(F)'] <= 90), 'Temperature(F)']=3
df.loc[ (df['Temperature(F)'] > 90) & (df['Temperature(F)'] <= 130),'Temperature(F)' ]=4
df.loc[  df['Temperature(F)'] > 130, 'Temperature(F)']=5

In [None]:
temp_values = df['Temperature(F)'].value_counts()
temp_values

In [None]:
data = [go.Bar(x=temp_values.index,
               y=temp_values.values,
               name='Temperature')]

layout = go.Layout(title='Accidents per Temperature - 2016 - 2019',
                   yaxis={'title':'Number of accidents'},
                   width=1200,
                   height=500)

fig = go.Figure(data=data, layout=layout)

fig.add_annotation(x=1,y=0,text="-30-10 F")
fig.add_annotation(x=2,y=0,text="10-50 F")
fig.add_annotation(x=3,y=0,text="50-90 F")
fig.add_annotation(x=4,y=0,text="90-130 F")
fig.add_annotation(x=5,y=0,text=">130 F")

py.iplot(fig)

In [None]:
df.loc[ (df['Humidity(%)'] > 0) & (df['Humidity(%)'] <= 20), 'Humidity(%)']=1
df.loc[ (df['Humidity(%)'] > 20) & (df['Humidity(%)'] <= 40), 'Humidity(%)' ]=2
df.loc[ (df['Humidity(%)'] > 40) & (df['Humidity(%)'] <= 60), 'Humidity(%)']=3
df.loc[ (df['Humidity(%)'] > 60) & (df['Humidity(%)'] <= 80),'Humidity(%)' ]=4
df.loc[  df['Humidity(%)'] > 80, 'Humidity(%)']=5

In [None]:
df['Humidity(%)'].unique()

In [None]:
hum = df['Humidity(%)'].value_counts()
hum

In [None]:
data = [go.Bar(x=hum.index,y=hum.values,name='Humidity')]

layout = go.Layout(title='Accidents according to Humitity',xaxis={'title':'Humidity'}, yaxis={'title':'Accidents'}, width=1200,
                   height=500)

fig = go.Figure(data=data,layout=layout)

fig.add_annotation(x=1,y=0,text="0-20 [%]")
fig.add_annotation(x=2,y=0,text="20-40 [%]")
fig.add_annotation(x=3,y=0,text="40-60 [%]")
fig.add_annotation(x=4,y=0,text="60-80 [%]")
fig.add_annotation(x=5,y=0,text="80-100 [%] F")


py.iplot(fig)

In [None]:
ts = df['Traffic_Signal'].value_counts()

In [None]:
data = [go.Pie(labels=ts.index,
               values=ts.values,
               direction='clockwise')
       ]

layout = go.Layout(title='Near a traffic light?',
                   width=600,
                   height=600)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
ss = df['Sunrise_Sunset'].value_counts()

In [None]:
data = [go.Pie(labels=ss.index,
               values=ss.values,
               direction='clockwise')
       ]

layout = go.Layout(title='Day or Night?',
                   width=600,
                   height=600)

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)