<a href="https://www.kaggle.com/code/samithsachidanandan/basic-eda-road-accident-data?scriptVersionId=221052694" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Importing the libraries 

In [None]:
import pandas as pd
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')


# Importing the Data

In [None]:
path = '/kaggle/input/road-accident-dataset/Road Accident Data.xlsx'

df= pd.read_excel(path)

# Display basic info

In [None]:
print("Dataset Shape:", df.shape)
print("\nDataset Info:")
print(df.info())


# Converting to correct Datatypes

In [None]:
df["Accident Date"] = pd.to_datetime(df["Accident Date"])
categorical_cols = [
    "Month", "Day_of_Week", "Junction_Control", "Junction_Detail", 
    "Accident_Severity", "Light_Conditions", "Local_Authority_(District)",
    "Police_Force", "Road_Surface_Conditions", 
    "Road_Type", "Urban_or_Rural_Area", "Weather_Conditions", "Vehicle_Type"
]
df[categorical_cols] = df[categorical_cols].astype("category")
df["Time"] = pd.to_datetime(df["Time"], format="%H:%M", errors="coerce").dt.time
df["Accident_Index"] = df["Accident_Index"].astype(str)

print(df.dtypes)


In [None]:
df.columns

# Display first few rows

In [None]:
print("\nFirst 5 Rows:")
df.head()

# Display last few rows

In [None]:
print("\nLast 5 Rows:")
df.tail()

# Check for missing values

In [None]:

print("\nMissing Values:")
df.isnull().sum()


# Removing the Columns with zero values

In [None]:
df.drop(columns=['Carriageway_Hazards', 'Time'], inplace=True)


In [None]:
df.dropna(inplace=True)


# Check unique values in each column

In [None]:


print("\nUnique Values Per Column:")
for col in df.columns:
    print(f"{col}: {df[col].nunique()} unique values")



# Summary statistics for numerical columns

In [None]:

print("\nSummary Statistics:")
df.describe()

# Plotting the Categorical Columns

In [None]:
categorical_cols = [
    "Month", "Day_of_Week", "Junction_Control",  
    "Accident_Severity", "Light_Conditions",
    "Road_Surface_Conditions", 
    "Road_Type", "Urban_or_Rural_Area"
]

plt.figure(figsize=(15,20))
for i, col in enumerate(categorical_cols):  
    plt.subplot(6, 2, i + 1)
    
    sns.countplot(x=df[col], palette="viridis", order=df[col].value_counts().index)
    plt.xticks(rotation=60)
    plt.title(f"Distribution of {col}")
plt.tight_layout()
plt.show()


# Plotting the Numerical Columns

In [None]:
numerical_cols = ["Year", "Number_of_Casualties", "Number_of_Vehicles", "Speed_limit"]

plt.figure(figsize=(12, 10))
for i, col in enumerate(numerical_cols):
    plt.subplot(3, 2, i + 1)
    sns.boxplot(y=df[col], palette="coolwarm")
    plt.title(f"Boxplot of {col}")
plt.tight_layout()
plt.show()

# Plotting the Severity 

In [None]:
plt.figure(figsize=(8, 5))
ax = sns.countplot(x=df["Accident_Severity"], palette="Set2", order=["Fatal", "Serious", "Slight"])
for p in ax.patches:
        ax.annotate(f'{p.get_height():.0f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', 
                    fontsize=12, color='black', 
                    xytext=(0, 5), textcoords='offset points')
    
plt.xlabel('Severity', fontsize=12)
plt.ylabel('No of Accidents', fontsize=12)
plt.title("Accident Severity Distribution")
plt.tight_layout()
plt.show()


# Plotting the Light Conditions  

In [None]:
plt.figure(figsize=(10, 8))
ax = sns.countplot(x=df["Light_Conditions"], palette="deep")
for p in ax.patches:
        ax.annotate(f'{p.get_height():.0f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', 
                    fontsize=12, color='black', 
                    xytext=(0, 5), textcoords='offset points')
    
plt.xticks(rotation=45)
plt.xlabel('Light Conditions', fontsize=12)
plt.ylabel('No of Accidents', fontsize=12)
plt.title("Light Conditions Distribution")
plt.tight_layout()
plt.show()


# Plotting the Road Surface Conditions  

In [None]:
plt.figure(figsize=(10, 8))
ax = sns.countplot(x=df["Road_Surface_Conditions"], palette="muted")
for p in ax.patches:
        ax.annotate(f'{p.get_height():.0f}', 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', 
                    fontsize=12, color='black', 
                    xytext=(0, 5), textcoords='offset points')
    
plt.xticks(rotation=45)
plt.xlabel('Road Surface Conditions', fontsize=12)
plt.ylabel('No of Accidents', fontsize=12)
plt.title("Road Surface Conditions Distribution")
plt.tight_layout()
plt.show()

#                                                      🚀 Thank You 🚀