In [9]:
!pip install dash
!pip install dash_bootstrap_components
!pip install flask_caching



In [10]:
import pandas as pd
import numpy as np
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
from flask_caching import Cache

# ==========================================
# Team color helpers (re-use from your app)
# ==========================================
TEAM_INFO = {
    "Chennai Super Kings": {"abbr": "CSK", "color": "#f1c40f"},
    "Mumbai Indians": {"abbr": "MI", "color": "#045093"},
    "Royal Challengers Bangalore": {"abbr": "RCB", "color": "#da291c"},
    "Kolkata Knight Riders": {"abbr": "KKR", "color": "#3b0a45"},
    "Rajasthan Royals": {"abbr": "RR", "color": "#ea1a8e"},
    "Sunrisers Hyderabad": {"abbr": "SRH", "color": "#ff822a"},
    "Delhi Capitals": {"abbr": "DC", "color": "#17449b"},
    "Punjab Kings": {"abbr": "PBKS", "color": "#d71920"},
    "Gujarat Titans": {"abbr": "GT", "color": "#041c2c"},
    "Lucknow Super Giants": {"abbr": "LSG", "color": "#00b2a9"},
    "Gujarat Lions": {"abbr": "GL", "color": "#f26522"},
    "Rising Pune Supergiant": {"abbr": "RPS", "color": "#800080"},
    "Pune Warriors India": {"abbr": "PWI", "color": "#00a3e0"},
    "Kochi Tuskers Kerala": {"abbr": "KTK", "color": "#f26a21"},
    "Deccan Chargers": {"abbr": "DCG", "color": "#1e4fa1"}
}

def get_team_abbr(team: str):
    if not isinstance(team, str): return "UNK"
    for k, v in TEAM_INFO.items():
        if k.lower() in team.lower():
            return v["abbr"]
    return team[:3].upper()

def get_team_color(team: str):
    if not isinstance(team, str): return "#888888"
    for k, v in TEAM_INFO.items():
        if k.lower() in team.lower():
            return v["color"]
    return "#888888"

# ==========================================
# Load & standardize data
# ==========================================
matches = pd.read_csv("matches.csv")
deliveries = pd.read_csv("deliveries.csv")

matches = matches.rename(columns={
    "Id": "match_id", "Season": "season", "Team1": "team1", "Team2": "team2",
    "Venue": "venue", "Date": "date", "Winner": "winner",
    "Player_Of_Match": "player_of_match", "Result": "result",
    "Match_No": "match_no", "City": "city", "Method": "method",
    "Toss_Winner": "toss_winner", "Toss_Decision": "toss_decision",
    "Super_Over": "super_over", "Match_Type": "match_type"
})
matches["date"] = pd.to_datetime(matches["date"], errors="coerce")

deliveries = deliveries.rename(columns={
    "Match_Id": "match_id", "Inning": "inning", "Over": "over", "Ball": "ball",
    "Batter": "batter", "Bowler": "bowler", "Batting_Team": "batting_team",
    "Bowling_Team": "bowling_team", "Batsman_Runs": "batsman_runs",
    "Total_Runs": "total_runs", "Is_Wicket": "is_wicket",
    "Player_Dismissed": "player_dismissed", "Extras_Type": "extras_type",
    "Extra_Runs": "extra_runs", "Dismissal_Kind": "dismissal_kind",
    "Non_Striker": "non_striker", "Fielder": "fielder"
})
if "is_wicket" not in deliveries.columns:
    deliveries["is_wicket"] = deliveries["player_dismissed"].notna().astype(int)

# enrich with season + date for easy filtering
deliveries = deliveries.merge(matches[["match_id", "season", "date"]], on="match_id", how="left")

ALL_PLAYERS = sorted(
    pd.concat([deliveries["batter"], deliveries["bowler"]], ignore_index=True).dropna().unique().tolist()
)
ALL_SEASONS = sorted(matches["season"].dropna().unique().tolist())
SEASON_MIN, SEASON_MAX = (min(ALL_SEASONS), max(ALL_SEASONS)) if ALL_SEASONS else (2008, 2025)

# ==========================================
# App & cache
# ==========================================
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.CYBORG])
server = app.server
cache = Cache(server, config={"CACHE_TYPE": "simple", "CACHE_DEFAULT_TIMEOUT": 600})

# ==========================================
# Helpers
# ==========================================
VALID_WICKET_EXCLUDES = {
    "Run Out", "Obstructing The Field", "Retired Hurt", "Retired Out",
    "Handled The Ball", "Timed Out"
}

def valid_ball_mask(df: pd.DataFrame):
    # A valid, countable ball for balls faced/bowled (exclude wides & no-balls)
    return ~df["extras_type"].fillna("").str.lower().isin(
        {"wides", "noballs", "wide", "no ball", "no-ball", "noball"}
    )

def phase_from_over(o):
    try:
        o = float(o)
    except:
        return "Other"
    if o <= 6: return "Powerplay"
    if o <= 15: return "Middle"
    return "Death"

def season_range_mask(df: pd.DataFrame, smin: int, smax: int):
    return (df["season"] >= smin) & (df["season"] <= smax)

def direction_filter(df, pA, pB, direction):
    """
    direction âˆˆ {"A_bat_B_bowl", "B_bat_A_bowl", "Both"}
    """
    a_bat = (df["batter"] == pA) & (df["bowler"] == pB)
    b_bat = (df["batter"] == pB) & (df["bowler"] == pA)
    if direction == "A_bat_B_bowl":
        return df[a_bat].copy()
    elif direction == "B_bat_A_bowl":
        return df[b_bat].copy()
    else:
        return df[a_bat | b_bat].copy()

def compute_kpis(df_matchup: pd.DataFrame):
    if df_matchup.empty:
        return {
            "balls": 0, "runs": 0, "sr": 0.0, "dots": 0, "dot_perc": 0.0,
            "fours": 0, "sixes": 0, "dismissals": 0, "balls_per_dismissal": None
        }
    valid = df_matchup[valid_ball_mask(df_matchup)]
    balls = len(valid)
    runs = int(valid["batsman_runs"].sum())
    dots = int((valid["total_runs"] == 0).sum())
    dot_perc = (dots / balls * 100) if balls > 0 else 0.0
    fours = int((valid["batsman_runs"] == 4).sum())
    sixes = int((valid["batsman_runs"] == 6).sum())
    sr = (runs / balls * 100) if balls > 0 else 0.0

    # dismissals (exclude non-bowler wickets)
    dis_df = df_matchup[
        (df_matchup["is_wicket"] == 1) &
        (df_matchup["player_dismissed"].notna())
    ].copy()
    dis_df["dismissal_kind"] = dis_df["dismissal_kind"].fillna("")
    dis_df = dis_df[~dis_df["dismissal_kind"].isin(VALID_WICKET_EXCLUDES)]
    dismissals = int(len(dis_df))

    bpd = (balls / dismissals) if dismissals > 0 else None
    return {
        "balls": balls, "runs": runs, "sr": round(sr, 1),
        "dots": dots, "dot_perc": round(dot_perc, 1),
        "fours": fours, "sixes": sixes,
        "dismissals": dismissals,
        "balls_per_dismissal": round(bpd, 1) if bpd else None
    }

def summary_table_by_match(df_matchup: pd.DataFrame):
    if df_matchup.empty:
        return pd.DataFrame(columns=["date", "match_id", "runs", "balls", "sr", "dismissals"])
    valid = df_matchup[valid_ball_mask(df_matchup)]
    grp = valid.groupby(["match_id", "date"]).agg(
        runs=("batsman_runs", "sum"),
        balls=("batsman_runs", "size")
    ).reset_index()
    grp["sr"] = (grp["runs"] / grp["balls"] * 100).round(1).replace([np.inf, np.nan], 0)

    dis = df_matchup[
        (df_matchup["is_wicket"] == 1) &
        (df_matchup["player_dismissed"].notna())
    ].copy()
    dis["dismissal_kind"] = dis["dismissal_kind"].fillna("")
    dis = dis[~dis["dismissal_kind"].isin(VALID_WICKET_EXCLUDES)]
    w = dis.groupby(["match_id"]).size().reset_index(name="dismissals")
    out = grp.merge(w, on="match_id", how="left").fillna({"dismissals": 0})
    out = out.sort_values("date", ascending=False)
    out["date"] = out["date"].dt.strftime("%d-%b-%Y")
    return out[["date", "match_id", "runs", "balls", "sr", "dismissals"]]

def outcome_counts(df_matchup: pd.DataFrame):
    if df_matchup.empty:
        return pd.DataFrame(columns=["outcome", "count"])
    valid = df_matchup[valid_ball_mask(df_matchup)].copy()
    # define discrete outcomes
    valid["outcome"] = valid["batsman_runs"].astype(int).astype(str)
    valid.loc[(valid["is_wicket"] == 1) &
              (valid["player_dismissed"].notna()) &
              (~valid["dismissal_kind"].fillna("").isin(VALID_WICKET_EXCLUDES)), "outcome"] = "W"
    order = ["0", "1", "2", "3", "4", "6", "W"]
    g = valid["outcome"].value_counts().reindex(order, fill_value=0).reset_index()
    g.columns = ["outcome", "count"]
    return g

def phase_profile(df_matchup: pd.DataFrame):
    if df_matchup.empty:
        return pd.DataFrame(columns=["phase", "runs", "balls", "sr"])
    df = df_matchup.copy()
    df["phase"] = df["over"].apply(phase_from_over)
    valid = df[valid_ball_mask(df)]
    g = valid.groupby("phase").agg(
        runs=("batsman_runs", "sum"),
        balls=("batsman_runs", "size")
    ).reset_index()
    g["sr"] = (g["runs"] / g["balls"] * 100).replace([np.inf, np.nan], 0)
    # order phases
    g["phase"] = pd.Categorical(g["phase"], ["Powerplay", "Middle", "Death", "Other"], ordered=True)
    return g.sort_values("phase")

def over_profile(df_matchup: pd.DataFrame):
    # Average runs vs over number (1..20)
    if df_matchup.empty:
        return pd.DataFrame(columns=["over", "avg_runs", "balls"])
    valid = df_matchup[valid_ball_mask(df_matchup)]
    g = valid.groupby("over").agg(
        avg_runs=("batsman_runs", "mean"),
        balls=("batsman_runs", "size")
    ).reset_index()
    return g.sort_values("over")

def ball_by_ball_view(df_matchup: pd.DataFrame):
    # Clean, compact table for interactive scatter/table
    if df_matchup.empty:
        return pd.DataFrame(columns=[
            "date", "match_id", "inning", "over", "ball", "batsman_runs", "total_runs",
            "is_wicket", "dismissal_kind", "fielder", "batting_team", "bowling_team", "batter", "bowler", "player_dismissed"
        ])
    df = df_matchup.copy()
    df = df.sort_values(["date", "match_id", "inning", "over", "ball"])
    return df[[
        "date", "match_id", "inning", "over", "ball", "batsman_runs", "total_runs",
        "is_wicket", "dismissal_kind", "fielder", "batting_team", "bowling_team", "batter", "bowler", "player_dismissed"
    ]]

# ==========================================
# Layout
# ==========================================
app.layout = dbc.Container([
    # === TAB 4 LAYOUT HEADER REPLACEMENT ===
    dbc.Row([
        dbc.Col(dcc.Dropdown(
            id="pvp-a",
            options=[{"label": p, "value": p} for p in ALL_PLAYERS],
            placeholder="Select Player A",
            value=ALL_PLAYERS[0],
            clearable=False
        ), md=6),

        dbc.Col(dcc.Dropdown(
            id="pvp-b",
            options=[{"label": p, "value": p} for p in ALL_PLAYERS],
            placeholder="Select Player B",
            clearable=False
        ), md=6),
    ], className="mb-3"),

    dbc.Row([
        dbc.Col(dcc.RadioItems(
            id="pvp-direction",
            options=[
                {"label": "A batting vs B bowling", "value": "A_bat_B_bowl"},
                {"label": "B batting vs A bowling", "value": "B_bat_A_bowl"}
            ],
            value="A_bat_B_bowl",
            inline=True
        ), md=7),
        dbc.Col([
            html.Div("Seasons", style={"fontSize": "12px", "opacity": 0.8}),
            dcc.RangeSlider(
                id="pvp-season-range",
                min=SEASON_MIN, max=SEASON_MAX, step=1,
                value=[SEASON_MIN, SEASON_MAX],
                allowCross=False,
                pushable=0,
                marks={int(s): str(s) for s in ALL_SEASONS}
            )
        ], md=5),
    ], className="mb-3"),

    html.Hr(),

    # KPI Row
    html.Div(id="pvp-kpis"),

    # Charts
    dbc.Row([
        dbc.Col(dcc.Graph(id="pvp-outcomes"), md=4),
        dbc.Col(dcc.Graph(id="pvp-phase"), md=4),
        dbc.Col(dcc.Graph(id="pvp-over-profile"), md=4),
    ], className="mb-3"),

    dbc.Row([
        dbc.Col(dcc.Graph(id="pvp-scatter"), md=7),
        dbc.Col(dcc.Graph(id="pvp-match-summary"), md=5),
    ], className="mb-3"),

    # Ball-by-ball table (compact)
    html.H4("ðŸ§¾ Ball-by-Ball (filtered)"),
    dcc.Loading(dcc.Graph(id="pvp-bb-table"), type="dot", color="#f1c40f"),

    html.Div(html.Small(
        "Tip: Use 'Both' direction to see combined outcomes; switch directions to isolate batter-vs-bowler roles.",
        className="text-muted"
    ))
], fluid=True)

# ==========================================
# Callback
# ==========================================
@app.callback(
    Output("pvp-kpis", "children"),
    Output("pvp-outcomes", "figure"),
    Output("pvp-phase", "figure"),
    Output("pvp-over-profile", "figure"),
    Output("pvp-scatter", "figure"),
    Output("pvp-match-summary", "figure"),
    Output("pvp-bb-table", "figure"),
    Input("pvp-a", "value"),
    Input("pvp-b", "value"),
    Input("pvp-direction", "value"),
    Input("pvp-season-range", "value")
)
def update_pvp(a, b, direction, season_range):
    if not a or not b or a == b:
        empty_fig = go.Figure()
        kpi = dbc.Row([dbc.Col(dbc.Alert("Select two different players.", color="warning"))])
        return kpi, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig, empty_fig

    smin, smax = season_range if season_range else (SEASON_MIN, SEASON_MAX)

    # Filter seasons first
    df = deliveries[season_range_mask(deliveries, smin, smax)].copy()
    # Pull matchup rows depending on direction (auto-detect roles)
    df_matchup = direction_filter(df, a, b, direction)

    # KPIs
    k = compute_kpis(df_matchup)

    # Team chip colors (based on most frequent teams in the matchup selection)
    def team_of(player, column_name):
        series = df_matchup.loc[df_matchup[column_name] == player, "batting_team" if column_name == "batter" else "bowling_team"]
        if series.dropna().empty:
            # fallback to global player identity via overall deliveries
            s2 = deliveries.loc[(deliveries["batter"] == player) | (deliveries["bowler"] == player)]
            t = s2["batting_team"].mode().iloc[0] if not s2["batting_team"].dropna().empty else (
                s2["bowling_team"].mode().iloc[0] if not s2["bowling_team"].dropna().empty else "Unknown"
            )
            return t
        return series.mode().iloc[0]

    team_a = team_of(a, "batter") if direction != "B_bat_A_bowl" else team_of(a, "bowler")
    team_b = team_of(b, "bowler") if direction != "B_bat_A_bowl" else team_of(b, "batter")
    col_a = get_team_color(team_a);
    col_b = get_team_color(team_b);

    kpi = dbc.Row([
        dbc.Col(dbc.Card(dbc.CardBody([
            html.Small("Player A", className="text-muted"),
            html.H5(a), html.Small(team_a)
        ])), style={"backgroundColor": col_a}, md=2),
        dbc.Col(dbc.Card(dbc.CardBody([
            html.Small("Player B", className="text-muted"),
            html.H5(b), html.Small(team_b)
        ])), style={"backgroundColor": col_b}, md=2),
        dbc.Col(dbc.Card(dbc.CardBody([html.Small("Balls"), html.H3(k["balls"])])), md=2),
        dbc.Col(dbc.Card(dbc.CardBody([html.Small("Runs"), html.H3(k["runs"])])), md=2),
        dbc.Col(dbc.Card(dbc.CardBody([html.Small("SR"), html.H3(k["sr"])])), md=2),
        dbc.Col(dbc.Card(dbc.CardBody([
            html.Small("4s/6s â€¢ Dot %"),
            html.H5(f'{k["fours"]}/{k["sixes"]} â€¢ {k["dot_perc"]}%')
        ])), md=2),
    ], className="mb-3 g-2")

    # Define color maps for better visualization
    # Enhanced color palette for dark theme
    outcome_colors = {
        "0": "#7f8c8d",  # Darker grey for dots
        "1": "#3498db",  # Blue for singles
        "2": "#2980b9",  # Slightly darker blue for doubles
        "3": "#2ecc71",  # Green for triples
        "4": "#f39c12",  # Orange for fours
        "6": "#e74c3c",  # Red for sixes
        "W": "#c0392b"   # Darker red/maroon for wicket, distinct from 6
    }
    phase_colors = {
        "Powerplay": "#8e44ad",  # Purple
        "Middle": "#27ae60",   # Emerald Green
        "Death": "#e74c3c",    # Alizarin Crimson (Red)
        "Other": "#7f8c8d"     # Grey
    }
    wicket_scatter_colors = {
        True: "#FF0000",   # Bright Red for wickets
        False: "#1abc9c"   # Turquoise for non-wickets
    }

    # Common layout updates for all figures for better dark theme integration
    common_layout_updates = dict(
        paper_bgcolor='rgba(0,0,0,0)',  # Transparent paper background
        plot_bgcolor='rgba(0,0,0,0)',  # Transparent plot background
        font=dict(color='white'),      # White font for text
        hoverlabel=dict(bgcolor='rgba(0,0,0,0.7)', font=dict(color='white')) # Dark hover label
    )

    # Outcome bar (0/1/2/3/4/6/W)
    oc = outcome_counts(df_matchup)
    fig_outcomes = px.bar(oc, x="outcome", y="count",
                          title="Ball Outcomes (Valid balls only)",
                          text="count",
                          color="outcome", color_discrete_map=outcome_colors)
    fig_outcomes.update_traces(textposition="outside")
    fig_outcomes.update_layout(showlegend=False, **common_layout_updates)


    # Phase SR
    ph = phase_profile(df_matchup)
    fig_phase = px.bar(ph, x="phase", y="sr",
                       title="Phase Strike Rate vs Opponent",
                       labels={"sr": "Strike Rate"},
                       color="phase", color_discrete_map=phase_colors)
    fig_phase.update_layout(showlegend=False, **common_layout_updates)

    # Over profile: average runs by over
    op = over_profile(df_matchup)
    fig_over = px.line(op, x="over", y="avg_runs", markers=True,
                       title="Average Runs by Over vs Opponent",
                       line_shape="spline", # Smoothen the line
                       color_discrete_sequence=["#00FFFF"] # Aqua
                       )
    fig_over.update_traces(marker=dict(size=8, line=dict(width=2, color='DarkSlateGrey')))
    fig_over.update_layout(**common_layout_updates)


    # Scatter: ball-by-ball timeline (size=runs, color=wicket)
    bb = ball_by_ball_view(df_matchup)
    if not bb.empty:
        bb["is_bowler_wicket"] = ((bb["is_wicket"] == 1) &
                                  (bb["player_dismissed"].notna()) &
                                  (~bb["dismissal_kind"].fillna("").isin(VALID_WICKET_EXCLUDES)))
        bb["date_str"] = bb["date"].dt.strftime("%d-%b-%Y")
        hover = ("date_str", "match_id", "inning", "over", "ball", "batsman_runs", "total_runs",
                 "dismissal_kind", "batter", "bowler", "batting_team", "bowling_team", "player_dismissed")
        fig_scatter = px.scatter(
            bb, x="over", y="batsman_runs",
            size="batsman_runs",
            color="is_bowler_wicket",
            color_discrete_map=wicket_scatter_colors,
            hover_data=list(hover),
            title="Ball-by-Ball: Runs per Ball (wickets highlighted)"
        )
        fig_scatter.update_layout(legend_title_text="Bowler Wicket", **common_layout_updates)
    else:
        fig_scatter = go.Figure()
        fig_scatter.update_layout(title="Ball-by-Ball: No data available", **common_layout_updates)


    # Match summary table (runs/balls/SR/dismissals per match)
    sm = summary_table_by_match(df_matchup)
    if "match_id" in sm.columns:
        sm = sm.drop(columns=["match_id"])

    fig_sum = go.Figure(data=[go.Table(
    header=dict(values=list(sm.columns), align="left", fill_color="#2c3e50", font=dict(color="white", size=12)),
    cells=dict(values=[sm[c] for c in sm.columns], align="left", fill_color="#34495e", font=dict(color="white", size=11))
    )])

    fig_sum.update_layout(title="Matchup Summary by Match", **common_layout_updates)


    # Ball by ball table (compact)
    show_cols = ["date", "inning", "over",
             "batsman_runs", "total_runs", "dismissal_kind"]

    if not bb.empty:
        bb2 = bb.copy()
        bb2["date"] = bb2["date"].dt.strftime("%d-%b-%Y")
        table_df = bb2[show_cols].tail(300)  # cap to last 300 rows for speed
    else:
        table_df = pd.DataFrame(columns=show_cols)

    fig_bb = go.Figure(data=[go.Table(
        header=dict(values=list(table_df.columns), align="left", fill_color="#2c3e50", font=dict(color="white", size=12)),
        cells=dict(values=[table_df[c] for c in table_df.columns], align="left", fill_color="#34495e", font=dict(color="white", size=11))
    )])
    fig_bb.update_layout(title="Ball-by-Ball (last 300 rows)", **common_layout_updates)


    return kpi, fig_outcomes, fig_phase, fig_over, fig_scatter, fig_sum, fig_bb

# ==========================================
# Run
# ==========================================
if __name__ == "__main__":
    app.run(debug=True, port=8060)

<IPython.core.display.Javascript object>