In [116]:
!pip install dash==2.15.0 dash-bootstrap-components pyngrok plotly pandas




In [117]:
df = pd.read_csv("merged_cleaned_dataset.csv")

In [118]:
columns_for_unique_values = [
    "PERSON_TYPE",
    "PERSON_INJURY",
    "EJECTION",
    "EMOTIONAL_STATUS",
    "BODILY_INJURY",
    "SAFETY_EQUIPMENT",
    "COMPLAINT",
    "BOROUGH",
    "CONTRIBUTING FACTOR VEHICLE 1",
    "VEHICLE TYPE CODE 1",
    "POSITION_IN_VEHICLE_CLEAN"
]

for col in columns_for_unique_values:
    if col in df.columns:
        print(f"\nUnique values for column '{col}':")
        # Convert numpy array to list for full display
        print(df[col].dropna().unique().tolist())
    else:
        print(f"\nColumn '{col}' not found in the DataFrame.")


Unique values for column 'PERSON_TYPE':
['Occupant', 'Bicyclist', 'Other Motorized', 'Pedestrian']

Unique values for column 'PERSON_INJURY':
['Unspecified', 'Injured', 'Killed']

Unique values for column 'EJECTION':
['Not Ejected', 'Does Not Apply', 'Ejected', 'Partially Ejected', 'Trapped', 'Unknown']

Unique values for column 'EMOTIONAL_STATUS':
['Does Not Apply', 'Conscious', 'Unknown', 'Shock', 'Unconscious', 'Semiconscious', 'Incoherent', 'Apparent Death']

Unique values for column 'BODILY_INJURY':
['Does Not Apply', 'Back', 'Knee-Lower Leg Foot', 'Head', 'Elbow-Lower-Arm-Hand', 'Chest', 'Neck', 'Unknown', 'Shoulder - Upper Arm', 'Hip-Upper Leg', 'Entire Body', 'Abdomen - Pelvis', 'Face', 'Eye']

Unique values for column 'SAFETY_EQUIPMENT':
['Lap Belt & Harness', 'Lap Belt', 'Unknown', 'Child Restraint Only', 'Helmet Only (In-Line Skater/Bicyclist)', 'Air Bag Deployed/Lap Belt/Harness', 'Helmet (Motorcycle Only)', 'Other', 'Air Bag Deployed/Lap Belt', 'Air Bag Deployed', 'Harnes

In [119]:
from pyngrok import ngrok

# paste your token:
ngrok.set_auth_token("35i4jGj0MWr5CAwfXH5qwNbon2R_6nXam5DADxoVGVpDu2isw")


In [120]:
import pandas as pd
import dash
from dash import dcc, html, Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px

# Extract helper columns
df["YEAR"] = pd.to_datetime(df["CRASH_DATETIME"], errors="coerce").dt.year

# ----------------------------------
# DASH APP
# ----------------------------------
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])

app.layout = dbc.Container([
    html.H2("NYC Crash Analysis Dashboard", className="mt-3 mb-4"),

    # Filters
    dbc.Row([
        dbc.Col([
            html.Label("Borough"),
            dcc.Dropdown(
                id="borough_filter",
                options=[{"label": b, "value": b} for b in sorted(df["BOROUGH"].dropna().unique())],
                multi=True,
                placeholder="Select borough(s)"
            )
        ], width=4),

        dbc.Col([
            html.Label("Vehicle Type"),
            dcc.Dropdown(
                id="vehicle_filter",
                options=[{"label": v, "value": v} for v in sorted(df["VEHICLE TYPE CODE 1"].dropna().unique())],
                multi=True,
                placeholder="Select vehicle type(s)"
            )
        ], width=4),

        dbc.Col([
            html.Label("Contributing Factor"),
            dcc.Dropdown(
                id="factor_filter",
                options=[{"label": v, "value": v} for v in sorted(df["CONTRIBUTING FACTOR VEHICLE 1"].dropna().unique())],
                multi=True,
                placeholder="Select contributing factor(s)"
            )
        ], width=4),
    ], className="mb-3"),

    dbc.Row([
        dbc.Col([
            html.Label("Injury Type"),
            dcc.Dropdown(
                id="injury_filter",
                options=[{"label": i, "value": i} for i in sorted(df["PERSON_INJURY"].dropna().unique())],
                multi=True,
                placeholder="Select injury type(s)"
            )
        ], width=6),

        dbc.Col([
            html.Label("Person Type"),
            dcc.Dropdown(
                id="person_type_filter",
                options=[{"label": v, "value": v} for v in sorted(df["PERSON_TYPE"].dropna().unique())],
                multi=True,
                placeholder="Select person type(s)"
            )
        ], width=6),
    ], className="mb-4"),

    # Search bar
    dbc.Input(id="search_input", placeholder="Search (e.g., 'Brooklyn 2022 pedestrian crashes')", type="text"),
    html.Br(),

    dbc.Button("Generate Report", id="generate_btn", color="primary", className="mb-4"),

    # Charts (ONLY the required 7)
    dbc.Row([
        dbc.Col(dcc.Graph(id="borough_chart"), width=6),
        dbc.Col(dcc.Graph(id="injury_chart"), width=6),
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id="ejection_chart"), width=6),
        dbc.Col(dcc.Graph(id="complaint_chart"), width=6),
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id="vehicle_factor_chart"), width=6),
        dbc.Col(dcc.Graph(id="position_chart"), width=6),
    ]),

    dbc.Row([
        dbc.Col(dcc.Graph(id="vehicle_trend_chart"), width=12),
    ]),

], fluid=True)


# ----------------------------------
# Callback
# ----------------------------------
@app.callback(
    [
        Output("borough_chart", "figure"),
        Output("injury_chart", "figure"),
        Output("ejection_chart", "figure"),
        Output("complaint_chart", "figure"),
        Output("vehicle_factor_chart", "figure"),
        Output("position_chart", "figure"),
        Output("vehicle_trend_chart", "figure"),
    ],
    [
        Input("generate_btn", "n_clicks"),
        Input("borough_filter", "value"),
        Input("vehicle_filter", "value"),
        Input("factor_filter", "value"),
        Input("injury_filter", "value"),
        Input("person_type_filter", "value"),
        Input("search_input", "value"),
    ]
)
def update_dashboard(n_clicks, borough, vehicles, factors, injuries, person_type, search_text):
    dff = df.copy()

    # Apply filters
    if borough:
        dff = dff[dff["BOROUGH"].isin(borough)]
    if vehicles:
        dff = dff[dff["VEHICLE TYPE CODE 1"].isin(vehicles)]
    if factors:
        dff = dff[dff["CONTRIBUTING FACTOR VEHICLE 1"].isin(factors)]
    if injuries:
        dff = dff[dff["PERSON_INJURY"].isin(injuries)]
    if person_type:
        dff = dff[dff["PERSON_TYPE"].isin(person_type)]

    # -------------------------
    # 1. Borough chart
    # -------------------------
    borough_df = dff.groupby("BOROUGH").size().reset_index(name="Count").sort_values("Count", ascending=False)
    fig_borough = px.bar(borough_df, x="BOROUGH", y="Count", title="Crashes by Borough (Highest â†’ Lowest)")

    # -------------------------
    # 2. Injury-Type chart (horizontal)
    # -------------------------
    injury_df = dff.groupby("BODILY_INJURY").size().reset_index(name="Count").sort_values("Count", ascending=False)
    fig_injury = px.bar(
        injury_df,
        x="Count",
        y="BODILY_INJURY",
        orientation="h",
        title="Crashes by Bodily Injury Type"
    )
    fig_injury.update_yaxes(categoryorder="total ascending")

    # -------------------------
    # 3. Ejection chart (grouped by person type)
    # -------------------------
    fig_ejection = px.bar(
        dff.groupby(["PERSON_TYPE", "EJECTION"]).size().reset_index(name="Count"),
        x="EJECTION",      # categories on x-axis
        y="Count",         # numeric on y-axis
        color="PERSON_TYPE",
        title="Ejection Status by Person Type"
    )


    # -------------------------
    # 4. Complaint chart (grouped by person type)
    # -------------------------
    top_complaints = dff["COMPLAINT"].value_counts().nlargest(10).index
    fig_complaint = px.bar(
    dff[dff["COMPLAINT"].isin(top_complaints)].groupby(["COMPLAINT", "PERSON_TYPE"])
       .size().reset_index(name="Count"),
    x="COMPLAINT",
    y="Count",
    color="PERSON_TYPE",
    title="Top 10 Complaints by Person Type",
)
    fig_complaint.update_layout(xaxis_tickangle=-45)
    # -------------------------
    # 5. Vehicle Factor Heatmap
    # -------------------------
    top_factors = dff["CONTRIBUTING FACTOR VEHICLE 1"].value_counts().nlargest(10).index
    fig_vehicle_factor = px.density_heatmap(
        dff[dff["CONTRIBUTING FACTOR VEHICLE 1"].isin(top_factors)],
        x="VEHICLE TYPE CODE 1",
        y="CONTRIBUTING FACTOR VEHICLE 1",
        title="Top Contributing Factors by Vehicle Type"
    )

    # -------------------------
    # 6. Position chart
    # -------------------------
    fig_position = px.bar(
        dff.groupby(["POSITION_IN_VEHICLE_CLEAN", "PERSON_INJURY"]).size().reset_index(name="Count"),
        x="POSITION_IN_VEHICLE_CLEAN",  # categories on x-axis
        y="Count",                       # numeric on y-axis
        color="PERSON_INJURY",
        title="Injuries by Position in Vehicle"
    )


    # -------------------------
    # 7. Vehicle trend chart
    # -------------------------
    trend_df = dff.groupby(["YEAR", "VEHICLE TYPE CODE 1"]).size().reset_index(name="Count")
    fig_vehicle_trend = px.line(
        trend_df,
        x="YEAR",
        y="Count",
        color="VEHICLE TYPE CODE 1",
        title="Crash Trends by Vehicle Type Over Years"
    )

    return fig_borough, fig_injury, fig_ejection, fig_complaint, fig_vehicle_factor, fig_position, fig_vehicle_trend


In [121]:

ngrok.kill()
public_url = ngrok.connect(8050)
print("Dashboard running on:", public_url)

app.run_server(port=8050)


Dashboard running on: NgrokTunnel: "https://stipendless-nanette-phantastical.ngrok-free.dev" -> "http://localhost:8050"


<IPython.core.display.Javascript object>