In [8]:
!pip install dash==2.15.0 dash-bootstrap-components pyngrok plotly pandas




In [41]:
columns_for_unique_values = [
    "PERSON_TYPE",
    "PERSON_INJURY",
    "EJECTION",
    "EMOTIONAL_STATUS",
    "BODILY_INJURY",
    "SAFETY_EQUIPMENT",
    "COMPLAINT",
    "BOROUGH",
    "CONTRIBUTING FACTOR VEHICLE 1",
    "VEHICLE TYPE CODE 1",
    "POSITION_IN_VEHICLE_CLEAN"
]

for col in columns_for_unique_values:
    if col in df.columns:
        print(f"\nUnique values for column '{col}':")
        # Convert numpy array to list for full display
        print(df[col].dropna().unique().tolist())
    else:
        print(f"\nColumn '{col}' not found in the DataFrame.")


Unique values for column 'PERSON_TYPE':
['Occupant', 'Bicyclist', 'Other Motorized', 'Pedestrian']

Unique values for column 'PERSON_INJURY':
['Unspecified', 'Injured', 'Killed']

Unique values for column 'EJECTION':
['Not Ejected', 'Does Not Apply', 'Ejected', 'Partially Ejected', 'Trapped', 'Unknown']

Unique values for column 'EMOTIONAL_STATUS':
['Does Not Apply', 'Conscious', 'Unknown', 'Shock', 'Unconscious', 'Semiconscious', 'Incoherent', 'Apparent Death']

Unique values for column 'BODILY_INJURY':
['Does Not Apply', 'Back', 'Knee-Lower Leg Foot', 'Head', 'Elbow-Lower-Arm-Hand', 'Chest', 'Neck', 'Unknown', 'Shoulder - Upper Arm', 'Hip-Upper Leg', 'Entire Body', 'Abdomen - Pelvis', 'Face', 'Eye']

Unique values for column 'SAFETY_EQUIPMENT':
['Lap Belt & Harness', 'Lap Belt', 'Unknown', 'Child Restraint Only', 'Helmet Only (In-Line Skater/Bicyclist)', 'Air Bag Deployed/Lap Belt/Harness', 'Helmet (Motorcycle Only)', 'Other', 'Air Bag Deployed/Lap Belt', 'Air Bag Deployed', 'Harnes

In [9]:
from pyngrok import ngrok

# paste your token:
ngrok.set_auth_token("35T05TnyrqD54FQkWNnSNTVihIK_8h6w5BLGyE72vroDQcdH")


In [45]:
import pandas as pd
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
import plotly.express as px
from pyngrok import ngrok

# Load dataset (already cleaned by you â€“ I will NOT preprocess anything)
df = pd.read_csv("merged_cleaned_dataset.csv")

# Extract helper columns
df["YEAR"] = pd.to_datetime(df["CRASH_DATETIME"], errors="coerce").dt.year

# ----------------------------------
# DASH APP
# ----------------------------------
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])

app.layout = dbc.Container([
    html.H2("NYC Crash Analysis Dashboard", className="mt-3 mb-4"),

# Filters
dbc.Row([
    dbc.Col([
        html.Label("Borough"),
        dcc.Dropdown(
            id="borough_filter",
            options=[{"label": b, "value": b} for b in sorted(df["BOROUGH"].dropna().unique())],
            multi=True,
            placeholder="Select borough(s)"
        )
    ], width=4),

    dbc.Col([
        html.Label("Year"),
        dcc.Dropdown(
            id="year_filter",
            options=[{"label": int(y), "value": int(y)} for y in sorted(df["YEAR"].dropna().unique())],
            multi=True,
            placeholder="Select year(s)"
        )
    ], width=4),

    dbc.Col([
        html.Label("Vehicle Type"),
        dcc.Dropdown(
            id="vehicle_filter",
            options=[{"label": v, "value": v} for v in sorted(df["VEHICLE TYPE CODE 1"].dropna().unique())],
            multi=True,
            placeholder="Select vehicle type(s)"
        )
    ], width=4),
], className="mb-3"),

dbc.Row([
    dbc.Col([
        html.Label("Contributing Factor"),
        dcc.Dropdown(
            id="factor_filter",
            options=[{"label": v, "value": v} for v in sorted(df["CONTRIBUTING FACTOR VEHICLE 1"].dropna().unique())],
            multi=True,
            placeholder="Select contributing factor(s)"
        )
    ], width=6),

    dbc.Col([
        html.Label("Injury Type"),
        dcc.Dropdown(
            id="injury_filter",
            options=[{"label": i, "value": i} for i in sorted(df["PERSON_INJURY"].dropna().unique())],
            multi=True,
            placeholder="Select injury type(s)"
        )
    ], width=6),
], className="mb-4"),

dbc.Row([
    dbc.Col([
        html.Label("Person Type"),
        dcc.Dropdown(
            id="person_type_filter",
            options=[{"label": v, "value": v} for v in sorted(df["PERSON_TYPE"].dropna().unique())],
            multi=True,
            placeholder="Select person type(s)"
        )
    ], width=4),

    dbc.Col([
        html.Label("Ejection"),
        dcc.Dropdown(
            id="ejection_filter",
            options=[{"label": v, "value": v} for v in sorted(df["EJECTION"].dropna().unique())],
            multi=True,
            placeholder="Select ejection status"
        )
    ], width=4),

    dbc.Col([
        html.Label("Emotional Status"),
        dcc.Dropdown(
            id="emotional_status_filter",
            options=[{"label": v, "value": v} for v in sorted(df["EMOTIONAL_STATUS"].dropna().unique())],
            multi=True,
            placeholder="Select emotional status"
        )
    ], width=4),
], className="mb-3"),

dbc.Row([
    dbc.Col([
        html.Label("Bodily Injury"),
        dcc.Dropdown(
            id="bodily_injury_filter",
            options=[{"label": v, "value": v} for v in sorted(df["BODILY_INJURY"].dropna().unique())],
            multi=True,
            placeholder="Select bodily injury"
        )
    ], width=4),

    dbc.Col([
        html.Label("Safety Equipment"),
        dcc.Dropdown(
            id="safety_equipment_filter",
            options=[{"label": v, "value": v} for v in sorted(df["SAFETY_EQUIPMENT"].dropna().unique())],
            multi=True,
            placeholder="Select safety equipment"
        )
    ], width=4),

    dbc.Col([
        html.Label("Complaint"),
        dcc.Dropdown(
            id="complaint_filter",
            options=[{"label": v, "value": v} for v in sorted(df["COMPLAINT"].dropna().unique())],
            multi=True,
            placeholder="Select complaint"
        )
    ], width=4),
], className="mb-4"),

dbc.Row([
    dbc.Col([
        html.Label("Position in Vehicle"),
        dcc.Dropdown(
            id="position_filter",
            options=[{"label": v, "value": v} for v in sorted(df["POSITION_IN_VEHICLE_CLEAN"].dropna().unique())],
            multi=True,
            placeholder="Select position"
        )
    ], width=6)
]), # Added comma here

    # Search bar
    dbc.Input(id="search_input", placeholder="Search (e.g., 'Brooklyn 2022 pedestrian crashes')", type="text"),
    html.Br(),

    dbc.Button("Generate Report", id="generate_btn", color="primary", className="mb-4"), # Added comma here

    # Charts
    dbc.Row([
        dbc.Col(dcc.Graph(id="borough_chart"), width=6),
        dbc.Col(dcc.Graph(id="year_chart"), width=6),
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id="vehicle_pie"), width=4),
        dbc.Col(dcc.Graph(id="heatmap"), width=4),
        dbc.Col(dcc.Graph(id="map_chart"), width=4),
    ]),

    dbc.Row([
    dbc.Col(dcc.Graph(id="factor_chart"), width=12)
    ]),

    dbc.Row([
    dbc.Col(dcc.Graph(id="injury_severity_chart"), width=6),
    dbc.Col(dcc.Graph(id="ejection_chart"), width=6),
]),

dbc.Row([
    dbc.Col(dcc.Graph(id="safety_chart"), width=6),
    dbc.Col(dcc.Graph(id="complaint_chart"), width=6),
]),

dbc.Row([
    dbc.Col(dcc.Graph(id="vehicle_factor_chart"), width=6),
    dbc.Col(dcc.Graph(id="position_chart"), width=6),
]),

dbc.Row([
    dbc.Col(dcc.Graph(id="vehicle_trend_chart"), width=12),
]),


], fluid=True)

# ----------------------------------
# Helper: Parsing search query
# ----------------------------------
def parse_search_query(q):
    q = q.lower()
    found = {}

    for b in df["BOROUGH"].dropna().unique():
        if b.lower() in q:
            found["borough"] = [b]

    for y in df["YEAR"].dropna().unique():
        if str(int(y)) in q:
            found["year"] = [int(y)]

    if "pedestrian" in q:
        found["injury"] = ["Injured", "Killed"]

    return found


# ----------------------------------
# Callback
# ----------------------------------
@app.callback(
    [
        Output("borough_chart", "figure"),
        Output("year_chart", "figure"),
        Output("vehicle_pie", "figure"),
        Output("heatmap", "figure"),
        Output("map_chart", "figure"),
        Output("factor_chart", "figure"),
        Output("injury_severity_chart", "figure"),
        Output("ejection_chart", "figure"),
        Output("safety_chart", "figure"),
        Output("complaint_chart", "figure"),
        Output("vehicle_factor_chart", "figure"),
        Output("position_chart", "figure"),
        Output("vehicle_trend_chart", "figure"),

    ],
    [
        Input("generate_btn", "n_clicks"),
        Input("borough_filter", "value"),
        Input("year_filter", "value"),
        Input("vehicle_filter", "value"),
        Input("factor_filter", "value"),
        Input("injury_filter", "value"),
        Input("person_type_filter", "value"),
        Input("ejection_filter", "value"),
        Input("emotional_status_filter", "value"),
        Input("bodily_injury_filter", "value"),
        Input("safety_equipment_filter", "value"),
        Input("complaint_filter", "value"),
        Input("position_filter", "value"),
        Input("search_input", "value"),
    ]
)

def update_dashboard(n_clicks, borough, years, vehicles, factors, injuries,
                     person_type, ejection, emotional_status, bodily_injury,
                     safety_equipment, complaint, position, search_text):
    dff = df.copy()

    # Apply search query
    if search_text:
        parsed = parse_search_query(search_text)
        if "borough" in parsed:
            borough = parsed["borough"]
        if "year" in parsed:
            years = parsed["year"]
        if "injury" in parsed:
            injuries = parsed["injury"]

    # Apply filters
    if borough:
        dff = dff[dff["BOROUGH"].isin(borough)]
    if years:
        dff = dff[dff["YEAR"].isin(years)]
    if vehicles:
        dff = dff[dff["VEHICLE TYPE CODE 1"].isin(vehicles)]
    if factors:
        dff = dff[dff["CONTRIBUTING FACTOR VEHICLE 1"].isin(factors)]
    if injuries:
        dff = dff[dff["PERSON_INJURY"].isin(injuries)]
    if person_type:
        dff = dff[dff["PERSON_TYPE"].isin(person_type)]
    if ejection:
        dff = dff[dff["EJECTION"].isin(ejection)]
    if emotional_status:
        dff = dff[dff["EMOTIONAL_STATUS"].isin(emotional_status)]
    if bodily_injury:
        dff = dff[dff["BODILY_INJURY"].isin(bodily_injury)]
    if safety_equipment:
        dff = dff[dff["SAFETY_EQUIPMENT"].isin(safety_equipment)]
    if complaint:
        dff = dff[dff["COMPLAINT"].isin(complaint)]
    if position:
        dff = dff[dff["POSITION_IN_VEHICLE_CLEAN"].isin(position)]

  # 1. Borough bar chart
    fig_borough = px.bar(
        dff.groupby("BOROUGH").size().reset_index(name="Count"),
        x="BOROUGH", y="Count",
        title="Crashes by Borough"
    )

    # 2. Year trend
    fig_year = px.line(
        dff.groupby("YEAR").size().reset_index(name="Count"),
        x="YEAR", y="Count",
        title="Crashes by Year"
    )

    # 3. Pie chart (PERSON TYPE)
    fig_pie = px.pie(
        dff,
        names="PERSON_TYPE",
        title="Distribution of Person Types"
    )

    # 4. Heatmap


    fig_heat = px.density_heatmap(
    dff,
    x="EMOTIONAL_STATUS",
    y="BODILY_INJURY",
    title="Heatmap: Bodily Injury vs Emotional Status",
    color_continuous_scale="Viridis",
    nbinsx=len(dff["EMOTIONAL_STATUS"].unique()),
    nbinsy=len(dff["BODILY_INJURY"].unique())
)
    fig_heat.update_layout(
    xaxis_title="Emotional Status",
    yaxis_title="Bodily Injury",
    xaxis_tickangle=-45
)

    # 5. Map
    fig_map = px.scatter_mapbox(
    dff,
    lat="LATITUDE",
    lon="LONGITUDE",
    hover_name="BOROUGH",
    color="BOROUGH",
    mapbox_style="open-street-map",
    zoom=9,
    title="Crash Locations (Colored by Borough)"
)

    fig_factor = px.bar(
    dff.groupby("CONTRIBUTING FACTOR VEHICLE 1")
       .size()
       .reset_index(name="Count"),
    x="Count",
    y="CONTRIBUTING FACTOR VEHICLE 1",
    title="Crashes by Contributing Factor",
    orientation="h"
)

    fig_factor.update_layout(
    xaxis_title="Number of Crashes",
    yaxis_title="Contributing Factor"
)
    fig_injury_severity = px.bar(
    dff.groupby(["PERSON_INJURY", "BODILY_INJURY"]).size().reset_index(name="Count"),
    x="PERSON_INJURY",
    y="Count",
    color="BODILY_INJURY",
    title="Bodily Injury Distribution by Injury Type"
)

    fig_ejection = px.bar(
    dff.groupby(["PERSON_TYPE", "EJECTION"]).size().reset_index(name="Count"),
    x="Count",
    y="EJECTION",
    color="PERSON_TYPE",
    orientation="h",
    title="Ejection Status by Person Type"
)

    fig_safety = px.bar(
    dff.groupby(["SAFETY_EQUIPMENT", "PERSON_INJURY"]).size().reset_index(name="Count"),
    x="SAFETY_EQUIPMENT",
    y="Count",
    color="PERSON_INJURY",
    title="Injury Count by Safety Equipment",
)
    fig_safety.update_layout(xaxis_tickangle=-45)


    top_complaints = dff["COMPLAINT"].value_counts().nlargest(10).index
    fig_complaint = px.bar(
    dff[dff["COMPLAINT"].isin(top_complaints)].groupby(["COMPLAINT", "PERSON_TYPE"])
       .size().reset_index(name="Count"),
    x="COMPLAINT",
    y="Count",
    color="PERSON_TYPE",
    title="Top 10 Complaints by Person Type",
)
    fig_complaint.update_layout(xaxis_tickangle=-45)

    top_factors = dff["CONTRIBUTING FACTOR VEHICLE 1"].value_counts().nlargest(10).index
    fig_vehicle_factor = px.density_heatmap(
    dff[dff["CONTRIBUTING FACTOR VEHICLE 1"].isin(top_factors)],
    x="VEHICLE TYPE CODE 1",
    y="CONTRIBUTING FACTOR VEHICLE 1",
    title="Top Contributing Factors by Vehicle Type",
    color_continuous_scale="Viridis",
)

    fig_position = px.bar(
    dff.groupby(["POSITION_IN_VEHICLE_CLEAN", "PERSON_INJURY"]).size().reset_index(name="Count"),
    x="Count",
    y="POSITION_IN_VEHICLE_CLEAN",
    color="PERSON_INJURY",
    orientation="h",
    title="Injuries by Position in Vehicle"
)
    fig_vehicle_trend = px.line(
    dff.groupby(["YEAR", "VEHICLE TYPE CODE 1"]).size().reset_index(name="Count"),
    x="YEAR",
    y="Count",
    color="VEHICLE TYPE CODE 1",
    title="Crash Trends by Vehicle Type Over Years",
)







    return fig_borough, fig_year, fig_pie, fig_heat, fig_map, fig_factor, fig_injury_severity, fig_ejection, fig_safety, fig_complaint, fig_vehicle_factor, fig_position,fig_vehicle_trend


In [46]:

ngrok.kill()
public_url = ngrok.connect(8050)
print("Dashboard running on:", public_url)

app.run_server(port=8050)


Dashboard running on: NgrokTunnel: "https://wavy-kendal-superintolerable.ngrok-free.dev" -> "http://localhost:8050"


<IPython.core.display.Javascript object>