In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import HeatMap

In [7]:
df=pd.read_csv('traffic_accidents.csv')
df.head()

Unnamed: 0,crash_date,traffic_control_device,weather_condition,lighting_condition,first_crash_type,trafficway_type,alignment,roadway_surface_cond,road_defect,crash_type,...,most_severe_injury,injuries_total,injuries_fatal,injuries_incapacitating,injuries_non_incapacitating,injuries_reported_not_evident,injuries_no_indication,crash_hour,crash_day_of_week,crash_month
0,07/29/2023 01:00:00 PM,TRAFFIC SIGNAL,CLEAR,DAYLIGHT,TURNING,NOT DIVIDED,STRAIGHT AND LEVEL,UNKNOWN,UNKNOWN,NO INJURY / DRIVE AWAY,...,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,3.0,13,7,7
1,08/13/2023 12:11:00 AM,TRAFFIC SIGNAL,CLEAR,"DARKNESS, LIGHTED ROAD",TURNING,FOUR WAY,STRAIGHT AND LEVEL,DRY,NO DEFECTS,NO INJURY / DRIVE AWAY,...,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,2.0,0,1,8
2,12/09/2021 10:30:00 AM,TRAFFIC SIGNAL,CLEAR,DAYLIGHT,REAR END,T-INTERSECTION,STRAIGHT AND LEVEL,DRY,NO DEFECTS,NO INJURY / DRIVE AWAY,...,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,3.0,10,5,12
3,08/09/2023 07:55:00 PM,TRAFFIC SIGNAL,CLEAR,DAYLIGHT,ANGLE,FOUR WAY,STRAIGHT AND LEVEL,DRY,NO DEFECTS,INJURY AND / OR TOW DUE TO CRASH,...,NONINCAPACITATING INJURY,5.0,0.0,0.0,5.0,0.0,0.0,19,4,8
4,08/19/2023 02:55:00 PM,TRAFFIC SIGNAL,CLEAR,DAYLIGHT,REAR END,T-INTERSECTION,STRAIGHT AND LEVEL,UNKNOWN,UNKNOWN,NO INJURY / DRIVE AWAY,...,NO INDICATION OF INJURY,0.0,0.0,0.0,0.0,0.0,3.0,14,7,8


In [8]:
#data information
df.info 

<bound method DataFrame.info of                     crash_date traffic_control_device weather_condition  \
0       07/29/2023 01:00:00 PM         TRAFFIC SIGNAL             CLEAR   
1       08/13/2023 12:11:00 AM         TRAFFIC SIGNAL             CLEAR   
2       12/09/2021 10:30:00 AM         TRAFFIC SIGNAL             CLEAR   
3       08/09/2023 07:55:00 PM         TRAFFIC SIGNAL             CLEAR   
4       08/19/2023 02:55:00 PM         TRAFFIC SIGNAL             CLEAR   
...                        ...                    ...               ...   
209301  09/13/2023 01:08:00 PM                UNKNOWN           UNKNOWN   
209302  07/18/2023 02:10:00 PM                UNKNOWN             CLEAR   
209303  10/23/2019 01:32:00 PM         TRAFFIC SIGNAL             CLEAR   
209304  06/01/2020 03:23:00 PM            NO CONTROLS             CLEAR   
209305  12/16/2022 12:10:00 PM         TRAFFIC SIGNAL             CLEAR   

            lighting_condition          first_crash_type  \
0      

In [19]:
#summary statistics
df.describe()

Unnamed: 0,num_units,injuries_total,injuries_fatal,injuries_incapacitating,injuries_non_incapacitating,injuries_reported_not_evident,injuries_no_indication,crash_hour,crash_day_of_week,crash_month
count,209306.0,209306.0,209306.0,209306.0,209306.0,209306.0,209306.0,209306.0,209306.0,209306.0
mean,2.0633,0.382717,0.001859,0.038102,0.221241,0.121516,2.244002,13.373047,4.144024,6.771822
std,0.396012,0.79972,0.047502,0.233964,0.61496,0.450865,1.241175,5.60383,1.966864,3.427593
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,2.0,9.0,2.0,4.0
50%,2.0,0.0,0.0,0.0,0.0,0.0,2.0,14.0,4.0,7.0
75%,2.0,1.0,0.0,0.0,0.0,0.0,3.0,17.0,6.0,10.0
max,11.0,21.0,3.0,7.0,21.0,15.0,49.0,23.0,7.0,12.0


In [20]:
df.columns

Index(['crash_date', 'traffic_control_device', 'weather_condition',
       'lighting_condition', 'first_crash_type', 'trafficway_type',
       'alignment', 'roadway_surface_cond', 'road_defect', 'crash_type',
       'intersection_related_i', 'damage', 'prim_contributory_cause',
       'num_units', 'most_severe_injury', 'injuries_total', 'injuries_fatal',
       'injuries_incapacitating', 'injuries_non_incapacitating',
       'injuries_reported_not_evident', 'injuries_no_indication', 'crash_hour',
       'crash_day_of_week', 'crash_month'],
      dtype='object')

In [23]:
#check if time column exists before processing
if 'time' in df.columns:
    df['time'] = pd.to_datetime(df['time'], errors='coerce')
    df.dropna(subset=['time'], inplace=True)  #drop rows where time conversion failed
    df['hour'] = df['time'].dt.hour

#convert time to categories (Morning, Afternoon, Evening, Night)
    def categorize_time(hour):
        if 5 <= hour < 12:
            return "Morning"
        elif 12 <= hour < 17:
            return "Afternoon"
        elif 17 <= hour < 21:
            return "Evening"
        else:
            return "Night"

    df['time_of_day'] = df['hour'].apply(categorize_time)
else:
    print("Warning: 'time' column not found in dataset. Skipping time-based analysis.")



In [37]:
#visualize accident counts by weather condition if column exists
if 'weather' in df.columns:
    plt.figure(figsize=(10, 5))
    sns.countplot(data=df, x='weather', order=df['weather'].value_counts().index, palette='coolwarm')
    plt.xticks(rotation=45)
    plt.title("Accidents by Weather Condition")
    plt.xlabel("Weather Condition")
    plt.ylabel("Number of Accidents")
    plt.show()
else:
    print("Warning: 'weather' column not found in dataset. Skipping weather analysis.")




In [36]:
#visualize accident counts by road condition if column exists
if 'road_condition' in df.columns:
    plt.figure(figsize=(10, 5))
    sns.countplot(data=df, x='road_condition', order=df['road_condition'].value_counts().index, palette='viridis')
    plt.xticks(rotation=45)
    plt.title("Accidents by Road Condition")
    plt.xlabel("Road Condition")
    plt.ylabel("Number of Accidents")
    plt.show()
else:
    print("Warning: 'road_condition' column not found in dataset. Skipping road condition analysis.")



In [27]:
#visualize accident counts by time of day if time analysis was performed
if 'time_of_day' in df.columns:
    plt.figure(figsize=(8, 5))
    sns.countplot(data=df, x='time_of_day', order=['Morning', 'Afternoon', 'Evening', 'Night'], palette='magma')
    plt.title("Accidents by Time of Day")
    plt.xlabel("Time of Day")
    plt.ylabel("Number of Accidents")
    plt.show()

In [32]:
#create a Heatmap of accident locations if latitude and longitude columns exist
if 'latitude' in df.columns and 'longitude' in df.columns:
    m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
    heat_data = list(zip(df['latitude'], df['longitude']))
    HeatMap(heat_data).add_to(m)
    
    #ave the heatmap
    m.save("accident_hotspots.html")
    print("Heatmap saved as 'accident_hotspots.html'. Open it in a web browser to view.")
else:
    print("Warning: 'latitude' or 'longitude' column not found in dataset. Skipping heatmap generation.")



In [39]:
#line plot for accident trends over time if 'date' column exists
if 'date' in df.columns:
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df.dropna(subset=['date'], inplace=True)
    df['date'] = df['date'].dt.date
    accident_trends = df.groupby('date').size()
    
    plt.figure(figsize=(12, 6))
    plt.plot(accident_trends.index, accident_trends.values, marker='o', linestyle='-', color='b')
    plt.title("Accident Trends Over Time")
    plt.xlabel("Date")
    plt.ylabel("Number of Accidents")
    plt.xticks(rotation=45)
    plt.grid()
    plt.show()
else:
    print("Warning: 'date' column not found in dataset. Skipping accident trend visualization.")

