In [None]:
# import libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder

In [None]:
data = pd.read_csv("accidents_2017_to_2023_english.csv")
data

In [None]:
# drop the unneccesary column

df = data.drop(columns= ["week_day","city"])
df

In [None]:
df.describe()

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
# Total number of accidents
total_accidents = df.shape[0]
print(f"Total Accidents: {total_accidents}")

# Total number of deaths
total_deaths = df["deaths"].sum()
print(f"Total Deaths: {total_deaths}")

# Total injured (slightly + severely)
df["total_injured"] = df["slightly_injured"] + df["severely_injured"]
print(f"Total Injured: {df['total_injured'].sum()}")


In [None]:
# Group by location (latitude, longitude)
accident_hotspots = df.groupby(["latitude", "longitude"]).size().reset_index(name="accident_count")

# Sort by highest accident count
top_hotspots = accident_hotspots.sort_values("accident_count", ascending=False).head(10)
print(top_hotspots)


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Scatter plot: Vehicles vs. Injuries
plt.figure(figsize=(8, 5))
sns.scatterplot(x=df["vehicles_involved"], y=df["total_injured"])
plt.xlabel("Vehicles Involved")
plt.ylabel("Total Injured")
plt.title("Impact of Vehicles on Injuries")
plt.show()


In [None]:
# Convert timestamp to datetime (if exists)
df["inverse_data"] = pd.to_datetime(df["inverse_data"])

# Extract month and weekday
df["month"] = df["inverse_data"].dt.month
df["weekday"] = df["inverse_data"].dt.day_name()

# Accidents per month
monthly_accidents = df.groupby("month").size()
monthly_accidents.plot(kind="bar", title="Accidents per Month", figsize=(8, 5))


In [None]:
# Define severity levels
def classify_severity(row):
    if row['deaths'] > 0:
        return "Fatal"
    elif row['severely_injured'] > 0:
        return "Severe"
    elif row['slightly_injured'] > 0:
        return "Minor"
    else:
        return "No Injury"

# Apply classification
df['severity'] = df.apply(classify_severity, axis=1)

# Count each severity type
severity_counts = df['severity'].value_counts()
print(severity_counts)

# Plot severity distribution
import matplotlib.pyplot as plt
severity_counts.plot(kind='bar', color=['red', 'orange', 'yellow', 'green'])
plt.title("Accident Severity Distribution")
plt.xlabel("Severity Level")
plt.ylabel("Number of Accidents")
plt.show()


In [None]:
road_accidents = df.groupby("road_id")["deaths"].sum().reset_index()
top_roads = road_accidents.sort_values("deaths", ascending=False).head(10)
print(top_roads)


In [None]:
df["inverse_data"] = pd.to_datetime(df["inverse_data"])  # Ensure datetime format
df["day_of_week"] = df["inverse_data"].dt.day_name()  # Extract weekday

# Count accidents per day
accidents_per_day = df["day_of_week"].value_counts()

# Plot
accidents_per_day.plot(kind="bar", color="purple", title="Accidents per Day of the Week")
plt.xlabel("Day of the Week")
plt.ylabel("Number of Accidents")
plt.show()


In [None]:
import seaborn as sns

correlation_matrix = df[['deaths', 'slightly_injured', 'severely_injured', 'uninjured', 'vehicles_involved']].corr()

# Heatmap
plt.figure(figsize=(8, 5))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Between Accident Factors")
plt.show()


In [None]:
severity_injury_avg = df.groupby("severity")[["slightly_injured", "severely_injured", "total_injured"]].mean()
print(severity_injury_avg)


In [None]:
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt

# Fit ARIMA Model
model = ARIMA(monthly_accidents, order=(2,1,2))
model_fit = model.fit()

# Forecast next 12 months
forecast = model_fit.forecast(steps=12)
forecast.index = pd.date_range(start=monthly_accidents.index[-1], periods=12, freq='M')

# Plot actual vs forecast
plt.figure(figsize=(10,5))
plt.plot(monthly_accidents, label= "Actual")
plt.plot(forecast, label="Forecast", linestyle="dashed", color="red")
plt.xlabel("Date")
plt.ylabel("Number of Accidents")
plt.title("Accident Forecast for Next 12 Months")
plt.legend()
plt.show()
