In [5]:
!pip install pandas openpyxl plotly gradio




In [6]:
import pandas as pd
import numpy as np

# Load the Excel file
file_path = "/content/Dataset - Data Drift 2025.xlsx"
df = pd.read_excel(file_path)

# Preview
print("Initial shape:", df.shape)
df.head()


Initial shape: (301718, 20)


Unnamed: 0,District Name,Year,Date,Time of Day,Number of vehicles involved,Number of Casualties,Accident_Classification,Accident_Spot,Urban / Rural,Accident Location,Cause,Accident Severity,Junction Control,Road_Character,Road_Type,Surface_Type,Surface_Condition,Road_Condition,Weather Condition,Lane_Type
0,Chitradurga,2022,13-01-2022,Evening,4,10,Road Accidents,Narrow road,Urban,Residential area,Not Applicable,Fatal,No signal lights,Others,Highways & Expressways,Bitumen(Tar),Dry,No influence on accident,Clear,
1,Ramanagara,2019,20-07-2019,Night,3,10,Road Accidents,Not Applicable,Urban,Near Bus stop,Not Applicable,Fatal,Not Applicable,Not Applicable,Highways & Expressways,Not Applicable,Wet,No influence on accident,Rain / Windy,
2,Ramanagara,2017,01-03-2017,Night,3,10,Road Accidents,Other,Rural,Open area,Not Applicable,Fatal,Uncontrolled,Others,Highways & Expressways,Bitumen(Tar),Dry,No influence on accident,Clear,
3,Bagalkot,2019,18-04-2019,Evening,2,10,Not Applicable,Circle,Urban,Near office complex,Human Error,Fatal,Controlled,Not Applicable,Highways & Expressways,Bitumen(Tar),Wet,No influence on accident,Clear,
4,Hassan,2020,03-08-2020,Night,2,10,Road Accidents,Cross roads,Rural,Near a factory industrial area,Not Applicable,Fatal,Uncontrolled,Straight and flat,Highways & Expressways,Concrete,Wet,No influence on accident,Rain / Windy,DualLane


In [7]:
# Drop duplicates
df.drop_duplicates(inplace=True)

# Replace blank entries
df.replace(r'^\s*$', np.nan, regex=True, inplace=True)

# Fix spelling errors (sample shown, you can expand)
df['Time of Day'] = df['Time of Day'].str.strip().str.lower()
df['Time of Day'] = df['Time of Day'].replace({
    'evening': 'Evening', 'night ': 'Night', 'mornng': 'Morning', 'afteroon': 'Afternoon'
})

# Handle missing values creatively
df.fillna({
    'Road Type': 'Unknown',
    'Surface Type': 'Unknown',
    'Weather Conditions': 'Unknown',
    'District': 'Unknown',
    'Time of Day': 'Unknown'
}, inplace=True)


In [8]:
# Ensure datetime
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Month and Day of Week
df['Month'] = df['Date'].dt.month_name()
df['Day of Week'] = df['Date'].dt.day_name()

# Weekend / Weekday
df['WeekPart'] = df['Day of Week'].apply(lambda x: 'Weekend' if x in ['Saturday', 'Sunday'] else 'Weekday')


  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')


In [11]:
import plotly.express as px
top5 = df['District Name'].value_counts().nlargest(5).reset_index()
fig = px.bar(top5, x='District Name', y='count', # Changed x and y arguments
             labels={'District Name': 'District', 'count': 'Accidents'}, # Updated labels accordingly
             title='Top 5 Districts by Number of Accidents',
             color='District Name', text_auto=True) # Updated color argument
fig.show()

In [13]:
severity = df['Accident Severity'].value_counts(normalize=True).reset_index()

severity.columns = ['Severity', 'Proportion']
fig = px.pie(severity, names='Severity', values='Proportion',
             title='Accident Severity Distribution', hole=0.3)
fig.show()

In [20]:
# 4. Road Type Contribution
fig4 = px.bar(df['Road_Type'].value_counts().reset_index(),
              x='Road_Type',  # Changed from 'index' to 'Road_Type'
              y='count',      # Changed from 'Road Type' to 'count'
              title='Accidents by Road Type',
              labels={'Road_Type': 'Road Type', 'count': 'Count'}) # Updated labels
fig4.show()

In [21]:
combo = df.groupby(['Time of Day', 'Day of Week', 'WeekPart']).size().reset_index(name='Count')
fig = px.sunburst(combo, path=['WeekPart', 'Day of Week', 'Time of Day'], values='Count',
                  title='Accident Patterns by Time of Day and Day of Week')
fig.show()


In [22]:
fig = px.pie(df['Weather Condition'].value_counts().reset_index(),
             names='Weather Condition',  # Changed 'index' to 'Weather Condition'
             values='count',           # Changed 'Weather Conditions' to 'count'
             title='Casualties by Weather Conditions')
fig.show()

In [23]:
fig = px.bar(df['Lane_Type'].value_counts().reset_index(),
             x='Lane_Type', y='count',
             labels={'Lane_Type': 'Lane_Type', 'count': 'Accidents'},
             title='Accidents by Lane Type', text_auto=True)
fig.show()


In [24]:
temp = df.groupby(['Road_Character', 'Accident Severity']).size().reset_index(name='Count')
fig = px.bar(temp, x='Road_Character', y='Count', color='Accident Severity',
             title='Road_Character vs Accident Severity', barmode='group', text_auto=True)
fig.show()


In [25]:
fig = px.bar(df['Surface_Type'].value_counts().reset_index(),
             x='Surface_Type', y='count',
             labels={'Surface_Type': 'Surface_Type', 'count': 'Accidents'},
             title='Accidents by Surface Type', text_auto=True)
fig.show()


In [27]:
top_causes = df['Cause'].value_counts().nlargest(10).reset_index()
# Rename the 'index' column to 'Cause' for clarity
top_causes.rename(columns={'index': 'Cause'}, inplace=True)
fig = px.histogram(top_causes, x='Cause', y='count',
             labels={'Cause': 'Cause'},
             title='Top 10 Causes of Accidents', color='Cause', text_auto=True) # Use 'Cause' as the color argument
fig.show()

In [28]:
temp = df[(df['Date'].dt.year >= 2021)]
trend = temp.groupby(df['Date'].dt.to_period("M")).size().reset_index(name='Casualties')
trend['Date'] = trend['Date'].dt.to_timestamp()
fig = px.line(trend, x='Date', y='Casualties', title='Monthly Casualties (2021–2023)', markers=True)
fig.show()


In [29]:
pivot = df.groupby(['Surface_Condition', 'Surface_Type', 'Weather Condition']).size().reset_index(name='Count')
fig = px.sunburst(pivot, path=['Surface_Condition', 'Surface_Type', 'Weather Condition'], values='Count',
                  title='Casualties by Surface Condition, Type and Weather')
fig.show()


In [30]:
vehicle = df.groupby(['Time of Day', 'Road_Type'])['Number of vehicles involved'].sum().reset_index()
fig = px.bar(vehicle, x='Time of Day', y='Number of vehicles involved', color='Road_Type',
             title='Vehicle Involvement by Time of Day and Road Type', barmode='group', text_auto=True)
fig.show()


In [31]:
junction = df.groupby(['Junction Control', 'Accident Severity']).size().reset_index(name='Count')
fig = px.bar(junction, x='Junction Control', y='Count', color='Accident Severity',
             title='Casualties at Junction Control by Severity', barmode='group', text_auto=True)
fig.show()


In [32]:
import gradio as gr
import plotly.express as px

def get_kpi_plot(kpi_name):
    if kpi_name == "Top 5 Districts by Accidents":
             top5 = df['District Name'].value_counts().nlargest(5).reset_index()
             fig = px.bar(top5, x='District Name', y='count', # Changed x and y arguments
             labels={'District Name': 'District', 'count': 'Accidents'}, # Updated labels accordingly
             title='Top 5 Districts by Number of Accidents',
             color='District Name', text_auto=True) # Updated color argument

    elif kpi_name == "Accident Severity Distribution":
        severity = df['Accident Severity'].value_counts(normalize=True).reset_index()
        severity.columns = ['Severity', 'Proportion']
        fig = px.pie(severity, names='Severity', values='Proportion', title=kpi_name, hole=0.3)

    elif kpi_name == "Accident Patterns by Time of Day and Day of Week":
        combo = df.groupby(['Time of Day', 'Day of Week', 'WeekPart']).size().reset_index(name='Count')
        fig = px.sunburst(combo, path=['WeekPart', 'Day of Week', 'Time of Day'], values='Count', title=kpi_name)

    elif kpi_name == "Road Type Contribution to Accidents":
          fig = px.bar(df['Road_Type'].value_counts().reset_index(),
              x='Road_Type',  # Changed from 'index' to 'Road_Type'
              y='count',      # Changed from 'Road Type' to 'count'
              title='Accidents by Road Type',
              labels={'Road_Type': 'Road Type', 'count': 'Count'}) # Updated labels
    elif kpi_name == "Casualties by Weather Conditions":
        counts = df['Weather Condition'].value_counts().reset_index(name='Count')
        fig = px.pie(counts, names='Weather Condition', values='Count', title=kpi_name)

    elif kpi_name == "Accidents by Lane Type":
        counts = df['Lane_Type'].value_counts().reset_index(name='Count')
        fig = px.bar(counts, x='Lane_Type', y='Count', title=kpi_name, text_auto=True)

    elif kpi_name == "Accidents by Road Character and Severity":
        temp = df.groupby(['Road_Character', 'Accident Severity']).size().reset_index(name='Count')
        fig = px.bar(temp, x='Road_Character', y='Count', color='Accident Severity', title=kpi_name, barmode='group', text_auto=True)

    elif kpi_name == "Accidents by Surface Type":
        counts = df['Surface_Type'].value_counts().reset_index(name='Count')
        fig = px.bar(counts, x='Surface_Type', y='Count', title=kpi_name, text_auto=True)

    elif kpi_name == "Top Causes of Accidents":
        top_causes = df['Cause'].value_counts().nlargest(10).reset_index()
        # Rename the 'index' column to 'Cause' for clarity
        top_causes.rename(columns={'index': 'Cause'}, inplace=True)
        fig = px.histogram(top_causes, x='Cause', y='count',
        labels={'Cause': 'Cause'},
        title='Top 10 Causes of Accidents', color='Cause', text_auto=True) # Use 'Cause' as the color argument
    elif kpi_name == "Monthly Casualties Trend (2021–2023)":
        temp = df[df['Date'].dt.year >= 2021]
        trend = temp.groupby(df['Date'].dt.to_period("M")).size().reset_index(name='Casualties')
        trend['Date'] = trend['Date'].dt.to_timestamp()
        fig = px.line(trend, x='Date', y='Casualties', title=kpi_name, markers=True)

    elif kpi_name == "Casualties by Surface Condition, Type, and Weather":
        pivot = df.groupby(['Surface_Condition', 'Surface_Type', 'Weather Condition']).size().reset_index(name='Count')
        fig = px.sunburst(pivot, path=['Surface_Condition', 'Surface_Type', 'Weather Condition'], values='Count', title=kpi_name)

    elif kpi_name == "Vehicle Involvement by Time and Road Type":
         vehicle = df.groupby(['Time of Day', 'Road_Type'])['Number of vehicles involved'].sum().reset_index()
         fig = px.bar(vehicle, x='Time of Day', y='Number of vehicles involved', color='Road_Type',
         title='Vehicle Involvement by Time of Day and Road Type', barmode='group', text_auto=True)

    elif kpi_name == "Casualties at Junction Control by Severity":
        junction = df.groupby(['Junction Control', 'Accident Severity']).size().reset_index(name='Count')
        fig = px.bar(junction, x='Junction Control', y='Count', color='Accident Severity', title=kpi_name, barmode='group', text_auto=True)

    else:
        fig = go.Figure()
        fig.add_annotation(text="Select a KPI to view the plot.", x=0.5, y=0.5, showarrow=False)

    return fig


In [34]:
kpi_options = [
    "Top 5 Districts by Accidents",
    "Accident Severity Distribution",
    "Accident Patterns by Time of Day and Day of Week",
    "Road Type Contribution to Accidents",
    "Casualties by Weather Conditions",
    "Accidents by Lane Type",
    "Accidents by Road Character and Severity",
    "Accidents by Surface Type",
    "Top Causes of Accidents",
    "Monthly Casualties Trend (2021–2023)",
    "Casualties by Surface Condition, Type, and Weather",
    "Vehicle Involvement by Time and Road Type",
    "Casualties at Junction Control by Severity"
]

gr.Interface(
    fn=get_kpi_plot,
    inputs=gr.Dropdown(kpi_options, label="Select a KPI"),
    outputs=gr.Plot(label="KPI Visualization"),
    title="Road Accident KPI Dashboard - Data Drift 2025"
).launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1c10384a53ced3e2ec.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


