# US Accidents

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('fast')

In [3]:
acc = pd.read_csv('data/us_accidents_full.csv')

In [4]:
acc.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,False,True,False,Day,Day,Day,Day


In [4]:
acc.isna().sum()

ID                             0
Source                         0
TMC                       728071
Severity                       0
Start_Time                     0
End_Time                       0
Start_Lat                      0
Start_Lng                      0
End_Lat                  2246264
End_Lng                  2246264
Distance(mi)                   0
Description                    1
Number                   1917605
Street                         0
Side                           0
City                          83
County                         0
State                          0
Zipcode                      880
Country                        0
Timezone                    3163
Airport_Code                5691
Weather_Timestamp          36705
Temperature(F)             56063
Wind_Chill(F)            1852623
Humidity(%)                59173
Pressure(in)               48142
Visibility(mi)             65691
Wind_Direction             45101
Wind_Speed(mph)           440840
Precipitat

## Dropped the following:
> • Precipitation(in) (1998358)<br>• End_Lat (2246264)<br>• End_Lng (2246264)<br>• Number (1917605)<br>• Wind_Chill(F) (1852623)

In [5]:
# Dropping Precipitation feature
acc.drop('Precipitation(in)', axis=1, inplace=True)

In [6]:
# Dropping End Latitude and End Longitude
acc.drop(columns=['End_Lat', 'End_Lng'], inplace=True)

In [7]:
acc.drop(columns=['Number', 'Wind_Chill(F)'], inplace=True)

In [8]:
acc.drop(columns='Description', inplace=True)

In [9]:
min(acc['TMC'])

200.0

In [10]:
max(acc['TMC'])

406.0

In [11]:
tmc_count = acc['TMC'].value_counts().to_frame()

In [12]:
np.sort(tmc_count.index.values.tolist())

array([200., 201., 202., 203., 206., 222., 229., 236., 239., 241., 244.,
       245., 246., 247., 248., 336., 339., 341., 343., 351., 406.])

In [13]:
tmc_count.shape

(21, 1)

## Replace TMC <code>NaN's</code> with <code>201.0</code>

In [14]:
acc['TMC'].fillna(201.0, inplace=True)

### <code>201.0</code> makes up 88% of the data points

In [15]:
2618108/2974335

0.8802330604992377

In [16]:
acc.Timezone.value_counts() # Use ffill and bfill then

US/Eastern     1277187
US/Pacific      805117
US/Central      727147
US/Mountain     161721
Name: Timezone, dtype: int64

In [17]:
acc.Timezone.isna().sum()

3163

In [18]:
acc.shape

(2974335, 43)

In [19]:
acc.isna().sum()

ID                            0
Source                        0
TMC                           0
Severity                      0
Start_Time                    0
End_Time                      0
Start_Lat                     0
Start_Lng                     0
Distance(mi)                  0
Street                        0
Side                          0
City                         83
County                        0
State                         0
Zipcode                     880
Country                       0
Timezone                   3163
Airport_Code               5691
Weather_Timestamp         36705
Temperature(F)            56063
Humidity(%)               59173
Pressure(in)              48142
Visibility(mi)            65691
Wind_Direction            45101
Wind_Speed(mph)          440840
Weather_Condition         65932
Amenity                       0
Bump                          0
Crossing                      0
Give_Way                      0
Junction                      0
No_Exit 

### Possibly with <code>ffill()</code> and <code>bfill()</code> ???

In [20]:
acc.City.value_counts()

Houston              93289
Charlotte            68054
Los Angeles          65851
Austin               58703
Dallas               58036
                     ...  
Blue Ridge Summit        1
Millry                   1
Alloway                  1
Fountainville            1
Keene Valley             1
Name: City, Length: 11685, dtype: int64

In [21]:
acc.City.isna().sum()

83

# Folium

In [8]:
acc.head()

Unnamed: 0,ID,Source,TMC,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,MapQuest,201.0,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,,,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,MapQuest,201.0,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,,,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,MapQuest,201.0,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,,,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,MapQuest,201.0,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,,,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,MapQuest,201.0,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,,,...,False,False,False,False,True,False,Day,Day,Day,Day


In [6]:
import folium

In [9]:
latitude = 39.865147
longitude = -84.058723

In [10]:
acc_map = folium.Map(location=[latitude,longitude],
                    zoom_start=12)
acc_map