<h1>Import Libraries

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway

<h1>Dataset to Dataframe

In [22]:
accident = pd.read_csv('accident_data.csv')

In [23]:
accident.dtypes

Index                       object
Accident_Severity           object
Accident Date               object
Latitude                   float64
Light_Conditions            object
District Area               object
Longitude                  float64
Number_of_Casualties         int64
Number_of_Vehicles           int64
Road_Surface_Conditions     object
Road_Type                   object
Urban_or_Rural_Area         object
Weather_Conditions          object
Vehicle_Type                object
dtype: object

<h1>Converting Object to DateTime Data Type

In [24]:
#syntax "to_datetime()": (column, dateformat, error)
accident['Accident Date'] = pd.to_datetime(accident['Accident Date'], dayfirst = True, errors = 'coerce')
accident.dtypes
#accident['Accident Date']

Index                              object
Accident_Severity                  object
Accident Date              datetime64[ns]
Latitude                          float64
Light_Conditions                   object
District Area                      object
Longitude                         float64
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions            object
Road_Type                          object
Urban_or_Rural_Area                object
Weather_Conditions                 object
Vehicle_Type                       object
dtype: object

<h1>Filling nulls

In [25]:
accident['Latitude'] = accident['Latitude'].fillna(accident['Latitude'].mode()[0])
accident['Longitude'] = accident['Longitude'].fillna(accident['Longitude'].mode()[0])
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].fillna('Unaccounted')
accident['Road_Type'] = accident['Road_Type'].fillna('Unaccounted')
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].fillna(accident['Urban_or_Rural_Area'].mode()[0])
accident['Weather_Conditions'] = accident['Weather_Conditions'].fillna('Unaccouted')
accident.isnull().sum()

Index                      0
Accident_Severity          0
Accident Date              0
Latitude                   0
Light_Conditions           0
District Area              0
Longitude                  0
Number_of_Casualties       0
Number_of_Vehicles         0
Road_Surface_Conditions    0
Road_Type                  0
Urban_or_Rural_Area        0
Weather_Conditions         0
Vehicle_Type               0
dtype: int64

In [26]:
accident['Accident_Severity'] = accident['Accident_Severity'].astype('category') 
accident['Latitude'] = accident['Latitude'].astype('category')
accident['Light_Conditions'] = accident['Light_Conditions'].astype('category')
accident['District Area'] = accident['District Area'].astype('category')
accident['Longitude'] = accident['Longitude'].astype('category')
accident['Road_Surface_Conditions'] = accident['Road_Surface_Conditions'].astype('category')
accident['Road_Type'] = accident['Road_Type'].astype('category')
accident['Urban_or_Rural_Area'] = accident['Urban_or_Rural_Area'].astype('category')
accident['Weather_Conditions'] = accident['Weather_Conditions'].astype('category')
accident['Vehicle_Type'] = accident['Vehicle_Type'].astype('category')
accident.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                         category
Light_Conditions                 category
District Area                    category
Longitude                        category
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

In [27]:
accident['Accident Date'] = pd.to_datetime(accident['Accident Date'], dayfirst = True, errors = 'coerce')

In [28]:
accident.dtypes

Index                              object
Accident_Severity                category
Accident Date              datetime64[ns]
Latitude                         category
Light_Conditions                 category
District Area                    category
Longitude                        category
Number_of_Casualties                int64
Number_of_Vehicles                  int64
Road_Surface_Conditions          category
Road_Type                        category
Urban_or_Rural_Area              category
Weather_Conditions               category
Vehicle_Type                     category
dtype: object

In [29]:
# accident['Accident Date'] = accident['Accident Date'].str.strip()
# accident['Accident Date'] = accident['Accident Date'].str.replace('/','-')
accident['Year'] = accident['Accident Date'].dt.year
accident['Month'] = accident['Accident Date'].dt.month
accident['Day'] = accident['Accident Date'].dt.day
accident['DayofWeek'] = accident['Accident Date'].dt.dayofweek

In [30]:
accident

Unnamed: 0,Index,Accident_Severity,Accident Date,Latitude,Light_Conditions,District Area,Longitude,Number_of_Casualties,Number_of_Vehicles,Road_Surface_Conditions,Road_Type,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type,Year,Month,Day,DayofWeek
0,200701BS64157,Serious,2019-06-05,51.506187,Darkness - lights lit,Kensington and Chelsea,-0.209082,1,2,Dry,Single carriageway,Urban,Fine no high winds,Car,2019,6,5,2
1,200701BS65737,Serious,2019-07-02,51.495029,Daylight,Kensington and Chelsea,-0.173647,1,2,Wet or damp,Single carriageway,Urban,Raining no high winds,Car,2019,7,2,1
2,200701BS66127,Serious,2019-08-26,51.517715,Darkness - lighting unknown,Kensington and Chelsea,-0.210215,1,3,Dry,Unaccounted,Urban,Unaccouted,Taxi/Private hire car,2019,8,26,0
3,200701BS66128,Serious,2019-08-16,51.495478,Daylight,Kensington and Chelsea,-0.202731,1,4,Dry,Single carriageway,Urban,Fine no high winds,Bus or coach (17 or more pass seats),2019,8,16,4
4,200701BS66837,Slight,2019-09-03,51.488576,Darkness - lights lit,Kensington and Chelsea,-0.192487,1,2,Dry,Unaccounted,Urban,Unaccouted,Other vehicle,2019,9,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660674,201091NM01760,Slight,2022-02-18,57.374005,Daylight,Highland,-3.467828,2,1,Dry,Single carriageway,Rural,Fine no high winds,Car,2022,2,18,4
660675,201091NM01881,Slight,2022-02-21,57.232273,Darkness - no lighting,Highland,-3.809281,1,1,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,2022,2,21,0
660676,201091NM01935,Slight,2022-02-23,57.585044,Daylight,Highland,-3.862727,1,3,Frost or ice,Single carriageway,Rural,Fine no high winds,Car,2022,2,23,2
660677,201091NM01964,Serious,2022-02-23,57.214898,Darkness - no lighting,Highland,-3.823997,1,2,Wet or damp,Single carriageway,Rural,Fine no high winds,Motorcycle over 500cc,2022,2,23,2


In [31]:
#insight 1
vehicles_casualties = accident['Number_of_Casualties'].corr(accident['Number_of_Vehicles'])
np.round(vehicles_casualties, 2)

np.float64(0.23)

<h3>INSIGHT 1: The correlation coefficient of 0.23 indicates a weak relationship between the number of casualties and vehicles, suggesting that other factors like road conditions, weather, or accident severity play a more significant role.</h3>

In [32]:
#insight 2
latitude_casualties = accident['Latitude'].corr(accident['Number_of_Casualties'])
np.round(latitude_casualties, 2)

np.float64(0.03)

<h3>INSIGHT 2: With a correlation coefficient of 0.03, there is no significant relationship between latitude and the number of casualties, suggesting that other factors like road conditions, traffic density, or weather play a more influential role.</h3>

In [None]:
#insights 3, 4, 5, and 6
severity_area = accident.groupby(['Accident_Severity','Urban_or_Rural_Area']).size()
severity_area.unstack().T

  severity_area = accident.groupby(['Accident_Severity','Urban_or_Rural_Area']).size()


Accident_Severity,Fatal,Serious,Slight
Urban_or_Rural_Area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rural,5601,37312,196077
Unallocated,0,1,10
Urban,3060,50904,367714


<h3>INSIGHT 3: Urban areas experience the highest number of slight accidents, with 367,714 incidents, making up 64.6% of all slight accidents recorded.</h3>
<h3>INSIGHT 4: Serious accidents in rural areas account for 42.3% of all serious incidents, showing that high-impact accidents are not limited to urban areas.</h3>
<h3>INSIGHT 5: Rural areas have a higher fatal accident count than urban areas, with 5,601 fatalities (64.7%), compared to 3,060 fatalities (35.3%) in urban areas.</h3>
<h3>INSIGHT 6: Slight accidents are the most common across all areas, totaling 563,801 incidents, representing 91.1% of all recorded accidents.</h3>

In [None]:
#insights 7, 8, and 9
severity_weather = accident.groupby(['Accident_Severity','Weather_Conditions']).size()
severity_weather.unstack().T

  severity_weather = accident.groupby(['Accident_Severity','Weather_Conditions']).size()


Accident_Severity,Fatal,Serious,Slight
Weather_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fine + high winds,175,1245,7134
Fine no high winds,7100,72046,441739
Fog or mist,82,483,2963
Other,165,1801,15184
Raining + high winds,145,1261,8209
Raining no high winds,848,9468,69380
Snowing + high winds,3,109,773
Snowing no high winds,36,565,5637
Unaccouted,107,1239,12782


<h3>INSIGHT 7: Serious accidents are most common in fine weather (no high winds), with 72,046 incidents, accounting for 72.9% of all serious accidents recorded.</h3>
<h3>INSIGHT 8: Fatal accidents are significantly higher in fine weather with no high winds, contributing to 7,100 deaths (84.7% of all fatalities), showing that good weather does not always mean safer conditions.</h3>
<h3>INSIGHT 9: Fog or mist contributes to only 0.56% of total accidents, but remains hazardous due to reduced visibility.</h3>

In [None]:
#insights 10, 11, 12, 13, 14, 15, 16, 17, and 18
road_casualties = accident.groupby(['Vehicle_Type','Road_Type'])['Number_of_Casualties'].count()
road_casualties.unstack()

  road_casualties = accident.groupby(['Vehicle_Type','Road_Type'])['Number_of_Casualties'].count()


Road_Type,Dual carriageway,One way street,Roundabout,Single carriageway,Slip road,Unaccounted
Vehicle_Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Agricultural vehicle,293,49,129,1436,27,13
Bus or coach (17 or more pass seats),4043,546,1704,19090,285,210
Car,74820,10223,33187,371028,5298,3436
Data missing or out of range,1,0,0,5,0,0
Goods 7.5 tonnes mgw and over,2631,341,1110,12915,188,122
Goods over 3.5t. and under 7.5t,959,116,422,4498,58,43
Minibus (8 - 16 passenger seats),275,40,138,1493,16,14
Motorcycle 125cc and under,2228,307,991,11507,147,89
Motorcycle 50cc and under,1085,133,512,5743,89,41
Motorcycle over 125cc and up to 500cc,1210,154,493,5659,98,42


<h3>INSIGHT 10: Single carriageways are the most hazardous road type, accounting for 74.49% of all casualties (492,141 incidents).
<h3>INSIGHT 11: Dual carriageways rank second, contributing to 15.05% of total casualties (99,424 incidents).
<h3>INSIGHT 12: Cars experience the highest number of casualties, making up 75.38% of total incidents (497,992 casualties).
<h3>INSIGHT 13: Pedal cycles have the lowest casualty rate, with only 0.03% of total incidents (195 casualties).
<h3>INSIGHT 14: Motorcycles over 500cc face extreme risks on single carriageways, where 74.25% of their accidents (19,050 casualties) occur.
<h3>INSIGHT 15: Roundabouts pose a significant danger to motorcycles over 500cc, contributing to a large share of their total casualties.
<h3>INSIGHT 16: Slip roads are the safest road type, accounting for only 1.06% of all casualties (7,032 incidents).
<h3>INSIGHT 17: Buses and coaches (17+ seats) are most at risk on single carriageways, with 73.77% of their total incidents (19,090 casualties) occurring there.
<h3>INSIGHT 18: Agricultural vehicles also experience the majority of their casualties on single carriageways, with 73.75% (1,436 casualties) recorded there.

In [None]:
#insights 19 and 20
road_light_casualties = accident.groupby(['Light_Conditions','Road_Surface_Conditions'])['Number_of_Casualties'].size()
road_light_casualties.unstack()

  road_light_casualties = accident.groupby(['Light_Conditions','Road_Surface_Conditions'])['Number_of_Casualties'].size()


Road_Surface_Conditions,Dry,Flood over 3cm. deep,Frost or ice,Snow,Unaccounted,Wet or damp
Light_Conditions,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Darkness - lighting unknown,4396,15,249,105,44,1675
Darkness - lights lit,70368,160,4621,1587,136,52463
Darkness - lights unlit,1353,4,130,26,3,1027
Darkness - no lighting,15619,265,3345,757,27,17424
Daylight,356085,573,10172,3415,516,114119


<h3>INSIGHT 19: Daylight poses the highest risk for casualties, accounting for over 73% of incidents, despite the advantage of clear visibility.</h3>
<h3>INSIGHT 20: Hazardous road conditions like ice and snow contribute to a significant number of casualties (24,407 cases), proving that adequate lighting alone is not enough to prevent accidents in such conditions.</h3>