In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
# 1. Create a DataFrame for the US-Accident data.
file_to_load = "../US_Accidents_Dec21_updated.csv"
accidents = pd.read_csv(file_to_load)
accidents.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Description,...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,3,2016-02-08 00:37:08,2016-02-08 06:37:08,40.10891,-83.09286,40.11206,-83.03187,3.23,Between Sawmill Rd/Exit 20 and OH-315/Olentang...,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,2,2016-02-08 05:56:20,2016-02-08 11:56:20,39.86542,-84.0628,39.86501,-84.04873,0.747,At OH-4/OH-235/Exit 41 - Accident.,...,False,False,False,False,False,False,Night,Night,Night,Night
2,A-3,2,2016-02-08 06:15:39,2016-02-08 12:15:39,39.10266,-84.52468,39.10209,-84.52396,0.055,At I-71/US-50/Exit 1 - Accident.,...,False,False,False,False,False,False,Night,Night,Night,Day
3,A-4,2,2016-02-08 06:51:45,2016-02-08 12:51:45,41.06213,-81.53784,41.06217,-81.53547,0.123,At Dart Ave/Exit 21 - Accident.,...,False,False,False,False,False,False,Night,Night,Day,Day
4,A-5,3,2016-02-08 07:53:43,2016-02-08 13:53:43,39.172393,-84.492792,39.170476,-84.501798,0.5,At Mitchell Ave/Exit 6 - Accident.,...,False,False,False,False,False,False,Day,Day,Day,Day


In [3]:
# 2. Check the datatypes of your columns.
accidents.dtypes

ID                        object
Severity                   int64
Start_Time                object
End_Time                  object
Start_Lat                float64
Start_Lng                float64
End_Lat                  float64
End_Lng                  float64
Distance(mi)             float64
Description               object
Number                   float64
Street                    object
Side                      object
City                      object
County                    object
State                     object
Zipcode                   object
Country                   object
Timezone                  object
Airport_Code              object
Weather_Timestamp         object
Temperature(F)           float64
Wind_Chill(F)            float64
Humidity(%)              float64
Pressure(in)             float64
Visibility(mi)           float64
Wind_Direction            object
Wind_Speed(mph)          float64
Precipitation(in)        float64
Weather_Condition         object
Amenity   

In [4]:
# 3. Redefine columns in dataframe
accidents_updated = accidents[['Severity', 'Start_Time', 'State', 'Temperature(F)', 'Visibility(mi)', 'Wind_Speed(mph)', 'Precipitation(in)', 
    'Weather_Condition', 'Sunrise_Sunset']].copy()
accidents_updated.head()

Unnamed: 0,Severity,Start_Time,State,Temperature(F),Visibility(mi),Wind_Speed(mph),Precipitation(in),Weather_Condition,Sunrise_Sunset
0,3,2016-02-08 00:37:08,OH,42.1,10.0,10.4,0.0,Light Rain,Night
1,2,2016-02-08 05:56:20,OH,36.9,10.0,,0.02,Light Rain,Night
2,2,2016-02-08 06:15:39,OH,36.0,10.0,,0.02,Overcast,Night
3,2,2016-02-08 06:51:45,OH,39.0,10.0,,,Overcast,Night
4,3,2016-02-08 07:53:43,OH,37.0,10.0,10.4,0.01,Light Rain,Day


In [5]:
#4. Add 'Year' index column from 'Start_Time' column
accidents_updated['year'] = pd.DatetimeIndex(accidents_updated['Start_Time']).year
accidents_updated.head()

Unnamed: 0,Severity,Start_Time,State,Temperature(F),Visibility(mi),Wind_Speed(mph),Precipitation(in),Weather_Condition,Sunrise_Sunset,year
0,3,2016-02-08 00:37:08,OH,42.1,10.0,10.4,0.0,Light Rain,Night,2016
1,2,2016-02-08 05:56:20,OH,36.9,10.0,,0.02,Light Rain,Night,2016
2,2,2016-02-08 06:15:39,OH,36.0,10.0,,0.02,Overcast,Night,2016
3,2,2016-02-08 06:51:45,OH,39.0,10.0,,,Overcast,Night,2016
4,3,2016-02-08 07:53:43,OH,37.0,10.0,10.4,0.01,Light Rain,Day,2016


In [6]:
#5. Replace NaN values with Zeros 
#Wind_Speed(mph) zero means no wind speed was reported for that accident
#Precipitation(in) zero means no precipitation was reported for that accident
accidents_updated.fillna(0)

Unnamed: 0,Severity,Start_Time,State,Temperature(F),Visibility(mi),Wind_Speed(mph),Precipitation(in),Weather_Condition,Sunrise_Sunset,year
0,3,2016-02-08 00:37:08,OH,42.1,10.0,10.4,0.00,Light Rain,Night,2016
1,2,2016-02-08 05:56:20,OH,36.9,10.0,0.0,0.02,Light Rain,Night,2016
2,2,2016-02-08 06:15:39,OH,36.0,10.0,0.0,0.02,Overcast,Night,2016
3,2,2016-02-08 06:51:45,OH,39.0,10.0,0.0,0.00,Overcast,Night,2016
4,3,2016-02-08 07:53:43,OH,37.0,10.0,10.4,0.01,Light Rain,Day,2016
...,...,...,...,...,...,...,...,...,...,...
2845337,2,2019-08-23 18:03:25,CA,86.0,10.0,13.0,0.00,Fair,Day,2019
2845338,2,2019-08-23 19:11:30,CA,70.0,10.0,6.0,0.00,Fair,Day,2019
2845339,2,2019-08-23 19:00:21,CA,73.0,10.0,10.0,0.00,Partly Cloudy,Day,2019
2845340,2,2019-08-23 19:00:21,CA,71.0,10.0,8.0,0.00,Fair,Day,2019
