In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# Load dataset
df = pd.read_csv('../data/Road Accident Data.csv')

# View basic structure
print(df.shape)
print(df.columns)
df.head()


(307973, 21)
Index(['Accident_Index', 'Accident Date', 'Day_of_Week', 'Junction_Control',
       'Junction_Detail', 'Accident_Severity', 'Latitude', 'Light_Conditions',
       'Local_Authority_(District)', 'Carriageway_Hazards', 'Longitude',
       'Number_of_Casualties', 'Number_of_Vehicles', 'Police_Force',
       'Road_Surface_Conditions', 'Road_Type', 'Speed_limit', 'Time',
       'Urban_or_Rural_Area', 'Weather_Conditions', 'Vehicle_Type'],
      dtype='object')


Unnamed: 0,Accident_Index,Accident Date,Day_of_Week,Junction_Control,Junction_Detail,Accident_Severity,Latitude,Light_Conditions,Local_Authority_(District),Carriageway_Hazards,...,Number_of_Casualties,Number_of_Vehicles,Police_Force,Road_Surface_Conditions,Road_Type,Speed_limit,Time,Urban_or_Rural_Area,Weather_Conditions,Vehicle_Type
0,BS0000001,01/01/2021,Thursday,Give way or uncontrolled,T or staggered junction,Serious,51.512273,Daylight,Kensington and Chelsea,,...,1,2,Metropolitan Police,Dry,One way street,30,03:11:00 PM,Urban,Fine no high winds,Car
1,BS0000002,01/05/2021,Monday,Give way or uncontrolled,Crossroads,Serious,51.514399,Daylight,Kensington and Chelsea,,...,11,2,Metropolitan Police,Wet or damp,Single carriageway,30,10:59:00 AM,Urban,Fine no high winds,Taxi/Private hire car
2,BS0000003,01/04/2021,Sunday,Give way or uncontrolled,T or staggered junction,Slight,51.486668,Daylight,Kensington and Chelsea,,...,1,2,Metropolitan Police,Dry,Single carriageway,30,02:19:00 PM,Urban,Fine no high winds,Taxi/Private hire car
3,BS0000004,01/05/2021,Monday,Auto traffic signal,T or staggered junction,Serious,51.507804,Daylight,Kensington and Chelsea,,...,1,2,Metropolitan Police,Frost or ice,Single carriageway,30,08:10:00 AM,Urban,Other,Motorcycle over 500cc
4,BS0000005,01/06/2021,Tuesday,Auto traffic signal,Crossroads,Serious,51.482076,Darkness - lights lit,Kensington and Chelsea,,...,1,2,Metropolitan Police,Dry,Single carriageway,30,05:25:00 PM,Urban,Fine no high winds,Car


In [4]:
# Parse dates
df['Accident Date'] = pd.to_datetime(df['Accident Date'], errors='coerce')

# Extract useful time parts
df['hour'] = pd.to_datetime(df['Time'], format='%H:%M', errors='coerce').dt.hour
df['month'] = df['Accident Date'].dt.month
df['year'] = df['Accident Date'].dt.year
df['day_of_week'] = df['Accident Date'].dt.day_name()


In [10]:
#Accident Sovereignity Distribution
plt.figure()
sns.countplot(data=df, x='Accident_Severity', order=df['Accident_Severity'].value_counts().index)
plt.title('Accident Severity Distribution')
plt.xlabel('Severity')
plt.ylabel('Number of Accidents')
plt.savefig('../outputs/severity_distribution.png')
plt.close()
plt.show()


In [13]:
# Get top 10 districts
top_districts = df['Local_Authority_(District)'].value_counts().nlargest(10)

# Plot and save
plt.figure()
sns.barplot(x=top_districts.values, y=top_districts.index)
plt.title('Top 10 Districts with Most Accidents')
plt.xlabel('Number of Accidents')
plt.ylabel('District')
plt.tight_layout()
plt.savefig('../outputs/top_districts.png')
plt.close()


In [15]:
#monthly trends of accidents
# Group and prepare monthly data
monthly_trend = df.groupby(['year', 'month']).size().reset_index(name='count')
monthly_trend['date'] = pd.to_datetime(monthly_trend[['year', 'month']].assign(day=1))

# Plot and save
plt.figure()
sns.lineplot(data=monthly_trend, x='date', y='count', marker='o')
plt.title('Monthly Accident Trend')
plt.xlabel('Date')
plt.ylabel('Number of Accidents')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../outputs/monthly_trend.png')
plt.close()


In [19]:
df.to_csv('../data/cleaned_accident_analysis.csv', index= False)