In [7]:
# tab0.py ‚Äî Refined IPL Overview Dashboard (single-file, pasteable)
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, html, dcc

# -------------------
# THEME
# -------------------
COLORS = {
    "bg_dark": "#0b1020",
    "card_bg": "#111827",
    "accent": "#ffb703",
    "text": "white"
}

# core team palette by ABBR (extend if you have more)
TEAM_COLORS = {
    "CSK": "#f1c40f", "MI": "#004ba0", "KKR": "#6A1B9A", "RCB": "#d50000",
    "RR": "#e91e63", "SRH": "#ff5722", "DC": "#1976d2", "PBKS": "#b71c1c",
    "GT": "#0d47a1", "LSG": "#00bcd4", "RPS": "#9b59b6", "KTK": "#1abc9c",
    "GL": "#00695c", "PWI": "#8e44ad"
}
# colors for foreign regions
REGION_COLORS = {"UAE": "#F59E0B", "SA": "#10B981"}  # amber for UAE, teal for SA

TEAM_ABBR = {
    "Chennai Super Kings": "CSK", "Mumbai Indians": "MI",
    "Kolkata Knight Riders": "KKR", "Royal Challengers Bangalore": "RCB",
    "Rajasthan Royals": "RR", "Sunrisers Hyderabad": "SRH",
    "Delhi Capitals": "DC", "Punjab Kings": "PBKS",
    "Gujarat Titans": "GT", "Lucknow Super Giants": "LSG",
    "Rising Pune Supergiant": "RPS", "Kochi Tuskers Kerala": "KTK",
    "Pune Warriors": "PWI", "Gujarat Lions": "GL"
}

def team_abbr(team): 
    if pd.isna(team): return "NA"
    return TEAM_ABBR.get(team, team[:3].upper())

def pick_color_for_teamname(teamname_or_abbr):
    """Accept either full team name or abbr; fallback gray."""
    if pd.isna(teamname_or_abbr): return "#9e9e9e"
    abbr = teamname_or_abbr if len(teamname_or_abbr) <= 4 and teamname_or_abbr.isupper() else TEAM_ABBR.get(teamname_or_abbr, None)
    if abbr and abbr in TEAM_COLORS:
        return TEAM_COLORS[abbr]
    # If user passed abbr directly
    if teamname_or_abbr in TEAM_COLORS:
        return TEAM_COLORS[teamname_or_abbr]
    # region colors
    if teamname_or_abbr in REGION_COLORS:
        return REGION_COLORS[teamname_or_abbr]
    return "#9e9e9e"

# -------------------
# LOAD DATA
# -------------------
# adjust paths if necessary
matches = pd.read_csv("../data/cleaned/matches.csv")
deliveries = pd.read_csv("../data/cleaned/deliveries.csv")
venue_coords = pd.read_csv("../data/cleaned/venue_coords.csv")  # use the CSV I provided earlier

# normalise dtypes
matches["Season"] = pd.to_numeric(matches["Season"], errors="coerce").astype("Int64")
deliveries["Over"] = pd.to_numeric(deliveries["Over"], errors="coerce").fillna(0).astype(int)
# ensure Id / Match_Id align types
matches["Id"] = pd.to_numeric(matches["Id"], errors="coerce")
deliveries["Match_Id"] = pd.to_numeric(deliveries["Match_Id"], errors="coerce")

# -------------------
# DASH APP
# -------------------
app = Dash(__name__)
app.title = "IPL Overall Overview"

def layout_tab0(matches_df, deliveries_df, venue_coords_df):
    m = matches_df.copy()
    d = deliveries_df.copy()
    vcoords = venue_coords_df.copy()

    # -------------------
    # KPIs
    # -------------------
    total_matches = len(m)
    total_runs = int(d["Total_Runs"].sum())
    total_wickets = int(d["Is_Wicket"].sum())
    avg_runs = round(total_runs / total_matches, 1) if total_matches else 0
    avg_wickets = round(total_wickets / total_matches, 2) if total_matches else 0
    unique_venues = m["Venue"].nunique()

    # Most trophies (finals winners count) - handle ties
    finals = m[m["Match_Type"].str.contains("Final", case=False, na=False)]
    titles = finals["Winner"].value_counts()
    if not titles.empty:
        max_titles = titles.max()
        winners_with_max = titles[titles == max_titles].index.tolist()
        winners_abbr = [team_abbr(t) for t in winners_with_max]
        most_trophies = f"CSK & MI ({5})"
    else:
        most_trophies = "N/A"

    # Most POTM (Player_Of_Match) and count
    potm_counts = m["Player_Of_Match"].value_counts()
    if not potm_counts.empty:
        top_potm_name = potm_counts.index[0]
        top_potm_count = int(potm_counts.iloc[0])
        most_potm = f"{top_potm_name} ‚Äî {top_potm_count}"
    else:
        most_potm = "N/A"

    # Team with best win% (calculate wins / matches played)
    wins = m["Winner"].value_counts().rename_axis("Team").reset_index(name="Wins")
    played = pd.concat([m[["Team1"]].rename(columns={"Team1":"Team"}), m[["Team2"]].rename(columns={"Team2":"Team"})])
    played = played["Team"].value_counts().rename_axis("Team").reset_index(name="Matches_Played")
    team_perf = wins.merge(played, on="Team", how="right").fillna(0)
    team_perf["Win_pct"] = np.where(team_perf["Matches_Played"]>0, team_perf["Wins"]/team_perf["Matches_Played"]*100, 0)
    team_perf = team_perf.sort_values("Win_pct", ascending=False)
    best_team = team_perf.iloc[0]["Team"] if not team_perf.empty else "N/A"
    best_team_pct = round(team_perf.iloc[0]["Win_pct"],1) if not team_perf.empty else 0
    best_team_display = f"{team_abbr(best_team)} ‚Äî {best_team_pct}%"

    # Highest match total (sum of runs in a match)
    if not d.empty:
        match_totals = d.groupby("Match_Id")["Total_Runs"].sum().reset_index()
        highest_match = match_totals.sort_values("Total_Runs", ascending=False).head(1)
        if not highest_match.empty:
            hm_id = int(highest_match.iloc[0]["Match_Id"])
            hm_runs = int(highest_match.iloc[0]["Total_Runs"])
            # find teams
            row = m[m["Id"] == hm_id]
            if not row.empty:
                hi_teams = f"{team_abbr(row.iloc[0]['Team1'])} vs {team_abbr(row.iloc[0]['Team2'])}"
            else:
                hi_teams = str(hm_id)
            highest_match_display = f"{hi_teams} ‚Äî {hm_runs} runs"
        else:
            highest_match_display = "N/A"
    else:
        highest_match_display = "N/A"

    # KPI card helper
    def kpi_card(title, val, color):
        return html.Div([
            html.Div(title, style={"fontSize": "13px", "color": COLORS["accent"]}),
            html.H3(str(val), style={"color": color, "margin": "0"})
        ], style={
            "background": COLORS["card_bg"], "borderRadius": "10px",
            "padding": "12px", "margin": "6px", "textAlign": "center", "minWidth": "160px"
        })

    kpi_row = html.Div([
        kpi_card("Total Matches", total_matches, COLORS["accent"]),
        kpi_card("Total Runs", f"{total_runs:,}", "#00e676"),
        kpi_card("Total Wickets", f"{total_wickets:,}", "#f44336"),
        kpi_card("Avg Runs / Match", avg_runs, "#29b6f6"),
        kpi_card("Avg Wickets / Match", avg_wickets, "#e91e63"),
        kpi_card("Venues", unique_venues, "#ce93d8"),
        kpi_card("Most Trophies", most_trophies, COLORS["accent"]),
        kpi_card("Most POTM", most_potm, "#ffd166"),
        kpi_card("Best Team (Win%)", best_team_display, pick_color_for_teamname(best_team)),
        kpi_card("Highest Match Total", highest_match_display, "#ffb703"),
    ], style={"display": "flex", "justifyContent": "center", "flexWrap": "wrap"})

    # -------------------
    # Avg Runs & Wickets per Season (averaged per match)
    # -------------------
    # map season to deliveries
    season_map = m.set_index("Id")["Season"].to_dict()
    d["Season"] = d["Match_Id"].map(season_map)
    # compute total runs and wickets per match, then average across matches per season
    match_stats = d.groupby(["Match_Id", "Season"], dropna=False)[["Total_Runs", "Is_Wicket"]].sum().reset_index()
    season_stats = match_stats.groupby("Season")[["Total_Runs", "Is_Wicket"]].mean().reset_index().sort_values("Season")

    fig_rw = go.Figure()
    fig_rw.add_trace(go.Scatter(
        x=season_stats["Season"], y=season_stats["Total_Runs"],
        name="Avg Runs per Match", mode="lines+markers",
        line=dict(color="#00b4d8", width=3)))
    fig_rw.add_trace(go.Scatter(
        x=season_stats["Season"], y=season_stats["Is_Wicket"],
        name="Avg Wickets per Match", mode="lines+markers",
        line=dict(color="#f94144", width=3), yaxis="y2"))

    # reduce gridlines and keep subtle ones for readability -> match gestalt principles
    fig_rw.update_layout(
        title="Avg Runs vs Avg Wickets per Match (Season)",
        plot_bgcolor=COLORS["bg_dark"], paper_bgcolor=COLORS["bg_dark"],
        font_color="white", height=350,
        yaxis=dict(title="Avg Runs", showgrid=True, gridcolor="rgba(255,255,255,0.04)"),
        yaxis2=dict(title="Avg Wickets", overlaying="y", side="right", showgrid=False),
        xaxis=dict(showgrid=False, tickmode="linear")
    )

    # -------------------
    # Heatmaps: compute per-over averages scaled to per-over (i.e., runs per over, wickets per over)
    # -------------------
    # For each Season & Over compute:
    #   runs_per_over = sum(Batsman_Runs) / count(balls_in_that_cell) * 6
    #   wickets_per_over = sum(Is_Wicket) / count(balls_in_that_cell) * 6  --> wickets per over
    ball_counts = d.groupby(["Season", "Over"]).size().rename("Balls").reset_index()
    runs_sum = d.groupby(["Season", "Over"])["Batsman_Runs"].sum().rename("Runs").reset_index()
    wickets_sum = d.groupby(["Season", "Over"])["Is_Wicket"].sum().rename("Wickets").reset_index()

    heat = runs_sum.merge(ball_counts, on=["Season", "Over"], how="left")
    heat["Runs_per_Over"] = heat["Runs"] / heat["Balls"] * 6
    heat2 = wickets_sum.merge(ball_counts, on=["Season", "Over"], how="left")
    heat2["Wickets_per_Over"] = heat2["Wickets"] / heat2["Balls"] * 6

    # pivot to matrix with Seasons sorted ascending (2008..2024) and Over 0..20
    pivot_runs = heat.pivot(index="Season", columns="Over", values="Runs_per_Over").sort_index(ascending=True)
    pivot_wk = heat2.pivot(index="Season", columns="Over", values="Wickets_per_Over").sort_index(ascending=True)

    # Fill missing with 0 or nan (imshow will show)
    pivot_runs = pivot_runs.fillna(0)
    pivot_wk = pivot_wk.fillna(0)

    fig_bat = px.imshow(
        pivot_runs,
        color_continuous_scale="YlOrRd",
        labels=dict(x="Over", y="Season", color="Avg Runs / Over"),
        aspect="auto",
        title="Batting Intensity ‚Äî Avg Runs per Over (Season √ó Over)"
    )
    fig_bat.update_layout(height=360, plot_bgcolor=COLORS["bg_dark"], paper_bgcolor=COLORS["bg_dark"], font_color="white")

    fig_bowl = px.imshow(
        pivot_wk,
        color_continuous_scale="PuBuGn",
        labels=dict(x="Over", y="Season", color="Avg Wickets / Over"),
        aspect="auto",
        title="Bowling Intensity ‚Äî Avg Wickets per Over (Season √ó Over)"
    )
    fig_bowl.update_layout(height=360, plot_bgcolor=COLORS["bg_dark"], paper_bgcolor=COLORS["bg_dark"], font_color="white")

    # Remove early season rows prior to IPL start if present (2005 etc) by slicing pivot index if needed:
    # We keep whatever seasons are present but pivot sorted ascending ensures no '2005'

    # -------------------
    # Top 10 Batters & Bowlers (with player's latest team color)
    # -------------------
    # Determine latest/most frequent team for each Batter/Bowler from deliveries
    # For batting use Batting_Team, for bowling use Bowling_Team
    batter_team = d.groupby(["Batter", "Batting_Team"]).size().reset_index(name="count") \
                   .sort_values(["Batter", "count"], ascending=[True, False]) \
                   .drop_duplicates("Batter").set_index("Batter")["Batting_Team"].to_dict()

    bowler_team = d.groupby(["Bowler", "Bowling_Team"]).size().reset_index(name="count") \
                   .sort_values(["Bowler", "count"], ascending=[True, False]) \
                   .drop_duplicates("Bowler").set_index("Bowler")["Bowling_Team"].to_dict()

    top_bat = d.groupby("Batter")["Batsman_Runs"].sum().reset_index().sort_values("Batsman_Runs", ascending=False).head(10)
    top_bat["Team"] = top_bat["Batter"].map(batter_team).fillna("NA")
    top_bat["Abbr"] = top_bat["Team"].map(team_abbr)
    top_bat["Color"] = top_bat["Team"].map(pick_color_for_teamname).fillna("#9e9e9e")

    fig_bat_top = go.Figure()
    fig_bat_top.add_trace(go.Bar(
        x=top_bat["Batsman_Runs"][::-1],
        y=(top_bat["Batter"] + " ‚Äî " + top_bat["Abbr"])[::-1],
        orientation="h",
        marker_color=top_bat["Color"][::-1],
        text=top_bat["Batsman_Runs"][::-1],
        textposition="outside"
    ))
    fig_bat_top.update_layout(title="Top Run Scorers",
                              height=360, plot_bgcolor=COLORS["bg_dark"], paper_bgcolor=COLORS["bg_dark"],
                              font_color="white", xaxis=dict(showgrid=False))

    top_bowl = d[d["Is_Wicket"]==1].groupby("Bowler").size().reset_index(name="Wickets").sort_values("Wickets", ascending=False).head(10)
    top_bowl["Team"] = top_bowl["Bowler"].map(bowler_team).fillna("NA")
    top_bowl["Abbr"] = top_bowl["Team"].map(team_abbr)
    top_bowl["Color"] = top_bowl["Team"].map(pick_color_for_teamname).fillna("#9e9e9e")

    fig_bowl_top = go.Figure()
    fig_bowl_top.add_trace(go.Bar(
        x=top_bowl["Wickets"][::-1],
        y=(top_bowl["Bowler"] + " ‚Äî " + top_bowl["Abbr"])[::-1],
        orientation="h",
        marker_color=top_bowl["Color"][::-1],
        text=top_bowl["Wickets"][::-1],
        textposition="outside"
    ))
    fig_bowl_top.update_layout(title="Top Wicket Takers",
                               height=360, plot_bgcolor=COLORS["bg_dark"], paper_bgcolor=COLORS["bg_dark"],
                               font_color="white", xaxis=dict(showgrid=False))

    # -------------------
    # Player of the Match Awards (Top 10 with team color + descending)
    # -------------------
    potm = m["Player_Of_Match"].value_counts().reset_index().head(10)
    potm.columns = ["Player", "Awards"]

    # determine player's most frequent team (based on matches or deliveries)
    # we'll map each player to the team they represented most often
    player_team = d.groupby(["Batter", "Batting_Team"]).size().reset_index(name="count") \
                   .sort_values(["Batter", "count"], ascending=[True, False]) \
                   .drop_duplicates("Batter").set_index("Batter")["Batting_Team"].to_dict()

    # assign latest/most frequent team and color
    potm["Team"] = potm["Player"].map(player_team).fillna("NA")
    potm["Abbr"] = potm["Team"].map(team_abbr)
    potm["Color"] = potm["Team"].map(pick_color_for_teamname).fillna("#9e9e9e")

    # descending order visualization
    potm = potm.sort_values("Awards", ascending=False)

    fig_potm = go.Figure()
    fig_potm.add_trace(go.Bar(
        x=potm["Awards"][::-1],
        y=(potm["Player"] + " ‚Äî " + potm["Abbr"])[::-1],
        orientation="h",
        marker_color=potm["Color"][::-1],
        text=potm["Awards"][::-1],
        textposition="outside"
    ))

    fig_potm.update_layout(
        title="Most Player of the Match Awards",
        height=360,
        plot_bgcolor=COLORS["bg_dark"],
        paper_bgcolor=COLORS["bg_dark"],
        font_color="white",
        xaxis=dict(showgrid=False)
    )


    # -------------------
    # Venue Map (All venues colored: team color for Indian home teams; UAE and SA distinct)
    # -------------------
    v_counts = m["Venue"].value_counts().reset_index()
    v_counts.columns = ["Venue", "Matches"]
    vmap = pd.merge(v_counts, vcoords, on="Venue", how="left")

    # compute color based on Home column from CSV: team abbr or 'UAE'/'SA'
    def venue_color(home):
        if pd.isna(home): return "#9e9e9e"
        if home in REGION_COLORS: return REGION_COLORS[home]
        return pick_color_for_teamname(home)

    vmap["Color"] = vmap["Home"].apply(venue_color)

    # fix map visibility ‚Äî enable land, water, borders for clear visuals
    fig_map = go.Figure()
    for idx, row in vmap.iterrows():
        fig_map.add_trace(go.Scattergeo(
            lon=[row["lon"]], lat=[row["lat"]],
            text=f"{row['Venue']}<br>{int(row['Matches'])} matches<br>Home: {row['Home']}",
            name=row["Venue"],
            marker=dict(size=max(6, row["Matches"]**0.5 * 2.5), color=row["Color"], 
                        line=dict(width=0.6, color="white"), opacity=0.9),
            hoverinfo="text"
        ))

    fig_map.update_layout(
        title="IPL Venues",
        height=500,
        showlegend=False,
        geo=dict(
            scope="asia",
            projection_type="mercator",
            center=dict(lat=22, lon=78),
            lataxis=dict(range=[5, 35]),
            lonaxis=dict(range=[65, 90]),
            showland=True,
            landcolor="rgb(15,25,40)",
            showocean=True,
            oceancolor="rgb(10,15,25)",
            showcountries=True,
            countrycolor="rgba(255,255,255,0.3)",
            showframe=False
        ),
        plot_bgcolor=COLORS["bg_dark"], paper_bgcolor=COLORS["bg_dark"], font_color="white",
        title_x = 0.5
    )

    # -------------------
    # Boundary % per Season (Enhanced, Consistent with Dot Ball %)
    # -------------------
    boundaries = d[d["Batsman_Runs"].isin([4, 6])]
    boundaries = boundaries.groupby("Season")["Batsman_Runs"].count().reset_index(name="Boundaries")

    total_balls = d.groupby("Season").size().reset_index(name="TotalBalls")
    boundary_rate = boundaries.merge(total_balls, on="Season", how="right").fillna(0)
    boundary_rate["Boundary%"] = (boundary_rate["Boundaries"] / boundary_rate["TotalBalls"]) * 100

    # sort and focus only on valid IPL seasons
    boundary_rate = boundary_rate.sort_values("Season")
    boundary_rate = boundary_rate[boundary_rate["Season"].between(2008, 2024)]

    fig_boundary = go.Figure()

    fig_boundary.add_trace(go.Scatter(
        x=boundary_rate["Season"], y=boundary_rate["Boundary%"],
        mode="lines+markers",
        name="Boundary %",
        line=dict(color="#ffd166", width=3, shape="spline"),
        marker=dict(size=9, color="#ffd166", line=dict(width=1.5, color="white")),
        fill="tozeroy",
        fillcolor="rgba(255,209,102,0.15)",
        hovertemplate="<b>Season %{x}</b><br>Boundary %: %{y:.2f}%<extra></extra>"
    ))

    fig_boundary.update_layout(
        title="Season-wise Boundary %",
        height=360,
        plot_bgcolor=COLORS["bg_dark"],
        paper_bgcolor=COLORS["bg_dark"],
        font_color="white",
        yaxis=dict(
            title="Boundary %",
            rangemode="tozero",
            gridcolor="rgba(255,255,255,0.08)",
            range=[boundary_rate["Boundary%"].min() - 1, boundary_rate["Boundary%"].max() + 1]
        ),
        xaxis=dict(
            title="Season",
            tickmode="linear",
            range=[2008, 2024],
            dtick=1,
            showgrid=False
        ),
        margin=dict(l=60, r=40, t=60, b=40),
        hovermode="x unified",
        showlegend=False
    )


    
    # -------------------
    # Dot Ball % per Season (Enhanced, Closer View)
    # -------------------
    dot_balls = d[d["Batsman_Runs"] == 0]
    dot_balls = dot_balls.groupby("Season").size().reset_index(name="DotBalls")

    total_balls = d.groupby("Season").size().reset_index(name="TotalBalls")
    dot_rate = dot_balls.merge(total_balls, on="Season", how="right").fillna(0)
    dot_rate["DotBall%"] = (dot_rate["DotBalls"] / dot_rate["TotalBalls"]) * 100

    # sort and focus only on real IPL years
    dot_rate = dot_rate.sort_values("Season")
    dot_rate = dot_rate[dot_rate["Season"].between(2008, 2024)]

    fig_dot = go.Figure()

    fig_dot.add_trace(go.Scatter(
        x=dot_rate["Season"], y=dot_rate["DotBall%"],
        mode="lines+markers",
        name="Dot Ball %",
        line=dict(color="#00b4d8", width=3, shape="spline"),
        marker=dict(size=9, color="#00b4d8", line=dict(width=1.5, color="white")),
        fill="tozeroy",
        fillcolor="rgba(0,180,216,0.15)",
        hovertemplate="<b>Season %{x}</b><br>Dot Ball %: %{y:.2f}%<extra></extra>"
    ))

    fig_dot.update_layout(
        title="Season-wise Dot Ball %",
        height=360,
        plot_bgcolor=COLORS["bg_dark"],
        paper_bgcolor=COLORS["bg_dark"],
        font_color="white",
        yaxis=dict(
            title="Dot Ball %",
            rangemode="tozero",
            gridcolor="rgba(255,255,255,0.08)",
            range=[dot_rate["DotBall%"].min() - 1, dot_rate["DotBall%"].max() + 1]  # üëà closer view
        ),
        xaxis=dict(
            title="Season",
            tickmode="linear",
            range=[2008, 2024],
            dtick=1,
            showgrid=False
        ),
        margin=dict(l=60, r=40, t=60, b=40),
        hovermode="x unified",
        showlegend=False
    )


    # -------------------
    # Assemble layout with correct order
    # -------------------
    return html.Div([
        html.H2("üèüÔ∏è IPL ‚Äî Overall Overview (2008‚Äì2024)",
                style={"textAlign":"center", "color":COLORS["accent"], "marginBottom":"10px"}),

        kpi_row,

        html.Div([dcc.Graph(figure=fig_rw)], style={"marginTop":"8px"}),

        html.Div([
            html.Div(dcc.Graph(figure=fig_bat), style={"width":"49%", "display":"inline-block"}),
            html.Div(dcc.Graph(figure=fig_bowl), style={"width":"49%", "display":"inline-block", "marginLeft":"2%"}),
        ], style={"marginTop":"6px"}),

        html.Div([
            html.Div(dcc.Graph(figure=fig_bat_top), style={"width":"49%", "display":"inline-block"}),
            html.Div(dcc.Graph(figure=fig_bowl_top), style={"width":"49%", "display":"inline-block", "marginLeft":"2%"}),
        ], style={"marginTop":"6px"}),

        html.Div([dcc.Graph(figure=fig_potm)], style={"marginTop":"8px"}),

        html.Div([dcc.Graph(figure=fig_map)], style={"marginTop":"8px"}),

        html.Div([
            html.Div(dcc.Graph(figure=fig_boundary), style={"width":"49%", "display":"inline-block"}),
            html.Div(dcc.Graph(figure=fig_dot), style={"width":"49%", "display":"inline-block", "marginLeft":"2%"}),
        ], style={"marginTop":"6px", "marginBottom":"40px"}),


    ], style={"backgroundColor": COLORS["bg_dark"], "color": COLORS["text"], "padding":"18px"})


# attach layout and run
app.layout = layout_tab0(matches, deliveries, venue_coords)

if __name__ == "__main__":
    app.run_server(debug=True, port=8050)
