Analyze traffic accident data to identify patterns related to road conditions, weather, and time of day. Visualize accident hotspots and contributing factors.

In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('US_Accidents_March23.csv')

In [3]:
print(df.isnull().sum())

ID                             0
Source                         0
Severity                       0
Start_Time                     0
End_Time                       0
Start_Lat                      0
Start_Lng                      0
End_Lat                  3402762
End_Lng                  3402762
Distance(mi)                   0
Description                    5
Street                     10869
City                         253
County                         0
State                          0
Zipcode                     1915
Country                        0
Timezone                    7808
Airport_Code               22635
Weather_Timestamp         120228
Temperature(F)            163853
Wind_Chill(F)            1999019
Humidity(%)               174144
Pressure(in)              140679
Visibility(mi)            177098
Wind_Direction            175206
Wind_Speed(mph)           571233
Precipitation(in)        2203586
Weather_Condition         173459
Amenity                        0
Bump      

In [4]:
df_cols=df.columns
print(df_cols)

Index(['ID', 'Source', 'Severity', 'Start_Time', 'End_Time', 'Start_Lat',
       'Start_Lng', 'End_Lat', 'End_Lng', 'Distance(mi)', 'Description',
       'Street', 'City', 'County', 'State', 'Zipcode', 'Country', 'Timezone',
       'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)',
       'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction',
       'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity',
       'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway',
       'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal',
       'Turning_Loop', 'Sunrise_Sunset', 'Civil_Twilight', 'Nautical_Twilight',
       'Astronomical_Twilight'],
      dtype='object')


In [5]:
for column in df.columns:
    if df[column].dtype == 'object':  # For object (categorical) columns
        df[column] = df[column].fillna(method='ffill').fillna(method='bfill')
    elif df[column].dtype == 'bool':  # For boolean columns, using mode
        df[column] = df[column].fillna(df[column].mode()[0])
    else:  # For numeric columns (including float)
        df[column] = df[column].fillna(df[column].mean())

  df[column] = df[column].fillna(method='ffill').fillna(method='bfill')


In [6]:
print(df.isnull().sum())

ID                       0
Source                   0
Severity                 0
Start_Time               0
End_Time                 0
Start_Lat                0
Start_Lng                0
End_Lat                  0
End_Lng                  0
Distance(mi)             0
Description              0
Street                   0
City                     0
County                   0
State                    0
Zipcode                  0
Country                  0
Timezone                 0
Airport_Code             0
Weather_Timestamp        0
Temperature(F)           0
Wind_Chill(F)            0
Humidity(%)              0
Pressure(in)             0
Visibility(mi)           0
Wind_Direction           0
Wind_Speed(mph)          0
Precipitation(in)        0
Weather_Condition        0
Amenity                  0
Bump                     0
Crossing                 0
Give_Way                 0
Junction                 0
No_Exit                  0
Railway                  0
Roundabout               0
S

In [7]:
df.head()  # to inspect data

Unnamed: 0,ID,Source,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),...,Roundabout,Station,Stop,Traffic_Calming,Traffic_Signal,Turning_Loop,Sunrise_Sunset,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight
0,A-1,Source2,3,2016-02-08 05:46:00,2016-02-08 11:00:00,39.865147,-84.058723,36.261829,-95.72557,0.01,...,False,False,False,False,False,False,Night,Night,Night,Night
1,A-2,Source2,2,2016-02-08 06:07:59,2016-02-08 06:37:59,39.928059,-82.831184,36.261829,-95.72557,0.01,...,False,False,False,False,False,False,Night,Night,Night,Day
2,A-3,Source2,2,2016-02-08 06:49:27,2016-02-08 07:19:27,39.063148,-84.032608,36.261829,-95.72557,0.01,...,False,False,False,False,True,False,Night,Night,Day,Day
3,A-4,Source2,3,2016-02-08 07:23:34,2016-02-08 07:53:34,39.747753,-84.205582,36.261829,-95.72557,0.01,...,False,False,False,False,False,False,Night,Day,Day,Day
4,A-5,Source2,2,2016-02-08 07:39:07,2016-02-08 08:09:07,39.627781,-84.188354,36.261829,-95.72557,0.01,...,False,False,False,False,True,False,Day,Day,Day,Day


In [15]:
df['State'].unique()

array(['Ohio', 'West Virginia', 'California', 'Florida', 'Georgia',
       'South Carolina', 'Nebraska', 'Iowa', 'Illinois', 'Missouri',
       'Wisconsin', 'Indiana', 'Michigan', 'New Jersey', 'New York',
       'Connecticut', 'Massachusetts', 'Rhode Island', 'New Hampshire',
       'Pennsylvania', 'Kentucky', 'Maryland', 'Virginia', 'DC',
       'Delaware', 'Texas', 'Washington', 'Oregon', 'Alabama',
       'North Carolina', 'Arizona', 'Tennessee', 'Louisiana', 'Minnesota',
       'Colorado', 'Oklahoma', 'Nevada', 'Utah', 'Kansas', 'New Mexico',
       'Arkansas', 'Mississippi', 'Maine', 'Vermont', 'Wyoming', 'Idaho',
       'North Dakota', 'Montana', 'South Dakota'], dtype=object)

In [12]:
# The dataset contains State column as codes. Converting into State Names using US library in python
!pip install us

Collecting us
  Downloading us-3.2.0-py3-none-any.whl.metadata (10 kB)
Collecting jellyfish (from us)
  Downloading jellyfish-1.1.0-cp311-none-win_amd64.whl.metadata (2.6 kB)
Downloading us-3.2.0-py3-none-any.whl (13 kB)
Downloading jellyfish-1.1.0-cp311-none-win_amd64.whl (207 kB)
   ---------------------------------------- 0.0/207.3 kB ? eta -:--:--
   ----- ---------------------------------- 30.7/207.3 kB 1.3 MB/s eta 0:00:01
   ----------- --------------------------- 61.4/207.3 kB 812.7 kB/s eta 0:00:01
   ----------------- --------------------- 92.2/207.3 kB 871.5 kB/s eta 0:00:01
   -------------------------- ----------- 143.4/207.3 kB 853.3 kB/s eta 0:00:01
   ------------------------------ ------- 163.8/207.3 kB 893.0 kB/s eta 0:00:01
   -------------------------------------- 207.3/207.3 kB 842.2 kB/s eta 0:00:00
Installing collected packages: jellyfish, us
Successfully installed jellyfish-1.1.0 us-3.2.0



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: C:\Users\saisa\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [14]:
import us
state_codes = df['State'].unique()
df['State'] = [us.states.lookup(code).name if us.states.lookup(code) else code for code in df['State']]

In [17]:
df.to_csv("cleaned_data.csv",index=False)   # exporting the cleaned dataframe into a csv