# C4 Kaggle Game Analyzer

## Config

In [1]:
from pathlib import Path

FILE = "M.N. Mohamed Aman lost_1"
GAME_JSON_PATH = Path(f"KaggleGames/{FILE}.json")
LA_DEPTH = 9
COMPUTE_ACTION_SCORES = True
MAX_PLIES = None  # stop early
FILTER = True

In [2]:
import json
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from IPython.display import display
import ipywidgets as widgets

from C4.fast_connect4_lookahead import Connect4Lookahead  
from C4.game_analysis_helpers import *
from Kaggle.N_step_lookahead_bitboard import N_step_lookahead_bitboard

pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 200)
pd.set_option("display.float_format", lambda x: f"{x:.3f}")

In [3]:
with open(GAME_JSON_PATH, "r", encoding="utf-8") as f:
    game = json.load(f)

cfg = game["configuration"]
ROWS = int(cfg["rows"])
COLS = int(cfg["columns"])
players = [a.get("Name") for a in game.get("info", {}).get("Agents", [])]
LOVRO = players.index('Lovro Selic') + 1

winner = players[game["rewards"].index(1)] if 1 in game["rewards"] else "Tie"

print("Game:", game.get("title", game.get("name", "<?>")))
print("EpisodeId:", game.get("info", {}).get("EpisodeId"))
print("Players:", players)
print("Rewards:", game.get("rewards"))
print("Statuses:", game.get("statuses"))
print("Board:", ROWS, "x", COLS)
print("LOVRO is mark", LOVRO)
print("Winner", winner)


Game: ConnectX
EpisodeId: 74119621
Players: ['Lovro Selic', 'M.N. Mohamed Aman']
Rewards: [-1, 1]
Statuses: ['DONE', 'DONE']
Board: 6 x 7
LOVRO is mark 1
Winner M.N. Mohamed Aman


In [4]:
steps = game["steps"]

# Gather all board snapshots by "environment step"
boards = []
step_nos = []
overages = []  # per env-step: [ov0, ov1]

for t, pair in enumerate(steps):
    b, s = find_board_in_pair(pair)
    ov = get_overage_in_pair(pair)

    if b is not None:
        boards.append(b)
        step_nos.append(s)
    else:
        # no board in this pair (rare), keep placeholder for overage alignment
        boards.append(None)
        step_nos.append(s)

    overages.append(ov)

# Forward-fill boards and overages where missing
last_board = None
for i in range(len(boards)):
    if boards[i] is None:
        boards[i] = last_board
    else:
        last_board = boards[i]

last_ov = [None, None]
for i in range(len(overages)):
    for a in (0, 1):
        if overages[i][a] is None:
            overages[i][a] = last_ov[a]
        else:
            last_ov[a] = overages[i][a]

# Now build ply list from board diffs (skip the initial state at env-step 0)
plies = []
for t in range(1, len(boards)):
    prev = boards[t - 1]
    cur = boards[t]
    if prev is None or cur is None:
        continue

    diff = diff_one_move(prev, cur)
    if diff is None:
        # Might happen at terminal bookkeeping; ignore
        continue

    mark, (r, c) = diff
    mover_idx = 0 if mark == 1 else 1

    ov_prev = overages[t - 1][mover_idx]
    ov_now  = overages[t][mover_idx]
    overage_used = 0.0
    if ov_prev is not None and ov_now is not None:
        overage_used = max(0.0, float(ov_prev) - float(ov_now))

    plies.append({
        "ply": len(plies) + 1,
        "env_step_index": t,
        "step_no": step_nos[t],
        "mark": mark,             # 1 or 2
        "player_idx": mover_idx,  # 0 or 1
        "row": r,
        "col": c,
        "board_before": prev,
        "board_after": cur,
        "overage_before": ov_prev,
        "overage_after": ov_now,
        "overage_used": overage_used,
    })

if MAX_PLIES is not None:
    plies = plies[:int(MAX_PLIES)]

print("Parsed plies:", len(plies))
print("Final rewards:", game.get("rewards"))


Parsed plies: 40
Final rewards: [-1, 1]


In [5]:
la = Connect4Lookahead()

# Make LA deterministic for analysis (otherwise it may randomize early book replies)
la.OPENING_RANDOM = False

print("LA ready. Depth =", LA_DEPTH)


LA ready. Depth = 9


In [6]:
rows = []
for rec in plies:
    b0 = rec["board_before"]
    mark = int(rec["mark"])
    actual_col = int(rec["col"])

    # LA suggestion from the same position (before the actual move)
    la_col = int(la.n_step_lookahead(b0, player=mark, depth=int(LA_DEPTH)))

    # Counterfactual board if LA had played there
    la_after = None
    la_legal = (la_col in legal_cols(b0))
    if la_legal:
        la_after = apply_move(b0, la_col, mark)

    # Optional: score gap (how much worse was actual vs LA-best per LA's own search)
    actual_score = None
    best_score = None
    score_gap = None
    if COMPUTE_ACTION_SCORES:
        sc = la.n_step_action_scores(b0, player=mark, depth=int(LA_DEPTH))
        if np.isfinite(sc[actual_col]):
            actual_score = float(sc[actual_col])
        if np.isfinite(sc[la_col]):
            best_score = float(sc[la_col])
        if actual_score is not None and best_score is not None:
            score_gap = best_score - actual_score

    rows.append({
        "ply": rec["ply"],
        "mark": mark,
        "player_idx": rec["player_idx"],
        "actual_col": actual_col,
        "la_col": la_col,
        "mismatch": (actual_col != la_col),
        "overage_used": rec["overage_used"],
        "overage_after": rec["overage_after"],
        "actual_score": actual_score,
        "best_score": best_score,
        "score_gap": score_gap,
        "board_before": b0,
        "board_after": rec["board_after"],
        "board_la_after": la_after,
    })

df = pd.DataFrame(rows)

if FILTER:
    df = df[(df["mark"] == LOVRO) & (df["mismatch"] == True)]


## Mismatches

In [7]:
display(df.head(10))
print("Mismatches:", int(df["mismatch"].sum()), "/", len(df))

Unnamed: 0,ply,mark,player_idx,actual_col,la_col,mismatch,overage_used,overage_after,actual_score,best_score,score_gap,board_before,board_after,board_la_after
6,7,1,0,4,3,True,0.0,60.0,349.372,424.986,75.614,"[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],..."
10,11,1,0,4,3,True,0.0,60.0,579.713,648.406,68.694,"[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0],..."
18,19,1,0,1,2,True,0.0,60.0,145331.419,145331.419,0.0,"[[0, 0, 0, 2, 0, 0, 0], [0, 0, 2, 2, 0, 0, 0],...","[[0, 0, 0, 2, 0, 0, 0], [0, 0, 2, 2, 0, 0, 0],...","[[0, 0, 1, 2, 0, 0, 0], [0, 0, 2, 2, 0, 0, 0],..."
24,25,1,0,0,5,True,0.0,60.0,145294.527,146919.674,1625.146,"[[0, 2, 1, 2, 0, 0, 0], [0, 2, 2, 2, 0, 0, 0],...","[[0, 2, 1, 2, 0, 0, 0], [0, 2, 2, 2, 0, 0, 0],...","[[0, 2, 1, 2, 0, 0, 0], [0, 2, 2, 2, 0, 1, 0],..."
28,29,1,0,6,0,True,0.0,60.0,951.072,951.072,0.0,"[[0, 2, 1, 2, 0, 2, 0], [0, 2, 2, 2, 0, 1, 0],...","[[0, 2, 1, 2, 0, 2, 0], [0, 2, 2, 2, 0, 1, 0],...","[[0, 2, 1, 2, 0, 2, 0], [0, 2, 2, 2, 0, 1, 0],..."


Mismatches: 5 / 5


In [8]:
idx_abs = df["score_gap"].abs().idxmax()
row_abs = df.loc[idx_abs]
PLY = int(row_abs['ply'])

print("\nWorst (by |score_gap|):")
print(f"PLY={PLY} | mark={int(row_abs['mark'])} | actual_col={int(row_abs['actual_col'])} | la_col={int(row_abs['la_col'])}")
print(f"actual_score={float(row_abs['actual_score']):.4f} | best_score={float(row_abs['best_score']):.4f} | score_gap={float(row_abs['score_gap']):.4f}")


Worst (by |score_gap|):
PLY=25 | mark=1 | actual_col=0 | la_col=5
actual_score=145294.5273 | best_score=146919.6735 | score_gap=1625.1462


In [9]:
analysis_rows = []

for i in range(len(df)):
    r = df.iloc[i]
    mark = int(r["mark"])
    opp = opp_mark(mark)

    b0 = r["board_before"]
    bA = r["board_after"]
    bL = r["board_la_after"]
    if bL is None:
        bL = b0  # fallback

    actual_col = int(r["actual_col"])
    la_col = int(r["la_col"])

    legal0 = legal_cols(b0)
    forced = (len(legal0) == 1)

    my_wins0 = win_cols(b0, mark)
    opp_wins0 = win_cols(b0, opp)

    my_safe0 = safe_moves(b0, mark)

    opp_wins_after_actual = win_cols(bA, opp)
    opp_wins_after_la     = win_cols(bL, opp)

    # fork-ish signal (compat wrapper in LA)
    fork_actual = la.compute_fork_signals(b0, bA, mover=mark)
    fork_la     = la.compute_fork_signals(b0, bL, mover=mark)

    # feature deltas
    f0 = pure_features(b0, mark)
    fA = pure_features(bA, mark)
    fL = pure_features(bL, mark)

    dA = feature_delta(fA, f0)
    dL = feature_delta(fL, f0)

    # Tags / reasons
    tags = []

    if forced:
        tags.append("FORCED_MOVE")

    if len(my_wins0) > 0 and actual_col not in my_wins0:
        tags.append("MISSED_WIN_IN_1")

    if len(opp_wins0) > 0 and (len(my_wins0) == 0) and (actual_col not in opp_wins0):
        tags.append("FAILED_BLOCK_OPP_WIN_IN_1")

    if len(opp_wins_after_actual) > 0 and (len(my_wins0) == 0):
        # if you had a win-in-1, allowing opp win-in-1 doesn't matter; otherwise it's usually catastrophic
        tags.append("ALLOWED_OPP_WIN_IN_1")

    if actual_col == 3:
        tags.append("PLAYED_CENTER")

    if la_col == 3:
        tags.append("LA_WANTS_CENTER")

    if actual_col != la_col:
        tags.append("DISAGREE_WITH_LA")

    analysis_rows.append({
        "ply": int(r["ply"]),
        "mark": mark,
        "actual_col": actual_col,
        "la_col": la_col,
        "legal0": legal0,
        "safe0": my_safe0,
        "my_wins0": my_wins0,
        "opp_wins0": opp_wins0,
        "opp_wins_after_actual": opp_wins_after_actual,
        "opp_wins_after_la": opp_wins_after_la,
        "fork_actual_my_after": int(fork_actual.get("my_after", 0)),
        "fork_actual_opp_before": int(fork_actual.get("opp_before", 0)),
        "fork_actual_opp_after": int(fork_actual.get("opp_after", 0)),
        "fork_la_my_after": int(fork_la.get("my_after", 0)),
        "fork_la_opp_before": int(fork_la.get("opp_before", 0)),
        "fork_la_opp_after": int(fork_la.get("opp_after", 0)),
        "tags": tags,
        "tags_str": ", ".join(tags),

        # keep your existing score gap (if present)
        "actual_score": r.get("actual_score", None),
        "best_score": r.get("best_score", None),
        "score_gap": r.get("score_gap", None),

        # feature deltas (Actual)
        "dA_me_pure2": dA["me_pure2"],
        "dA_me_pure3": dA["me_pure3"],
        "dA_opp_pure2": dA["opp_pure2"],
        "dA_opp_pure3": dA["opp_pure3"],
        "dA_me_imw": dA["me_imw"],
        "dA_opp_imw": dA["opp_imw"],

        # feature deltas (LA)
        "dL_me_pure2": dL["me_pure2"],
        "dL_me_pure3": dL["me_pure3"],
        "dL_opp_pure2": dL["opp_pure2"],
        "dL_opp_pure3": dL["opp_pure3"],
        "dL_me_imw": dL["me_imw"],
        "dL_opp_imw": dL["opp_imw"],
    })

dfA = pd.DataFrame(analysis_rows)



## Tagged plies

In [10]:
display(dfA.head(12))
print("Tagged plies:", dfA["tags_str"].astype(bool).sum(), "/", len(dfA))

Unnamed: 0,ply,mark,actual_col,la_col,legal0,safe0,my_wins0,opp_wins0,opp_wins_after_actual,opp_wins_after_la,fork_actual_my_after,fork_actual_opp_before,fork_actual_opp_after,fork_la_my_after,fork_la_opp_before,fork_la_opp_after,tags,tags_str,actual_score,best_score,score_gap,dA_me_pure2,dA_me_pure3,dA_opp_pure2,dA_opp_pure3,dA_me_imw,dA_opp_imw,dL_me_pure2,dL_me_pure3,dL_opp_pure2,dL_opp_pure3,dL_me_imw,dL_opp_imw
0,7,1,4,3,"[0, 1, 2, 3, 4, 5, 6]","[0, 1, 2, 3, 4, 5, 6]",[],[],[],[],0,0,0,0,0,0,"[LA_WANTS_CENTER, DISAGREE_WITH_LA]","LA_WANTS_CENTER, DISAGREE_WITH_LA",349.372,424.986,75.614,1,0,-1,0,0,0,1,0,0,0,0,0
1,11,1,4,3,"[0, 1, 2, 3, 4, 5, 6]","[0, 1, 2, 3, 4, 5, 6]",[],[],[],[],0,0,0,1,0,0,"[LA_WANTS_CENTER, DISAGREE_WITH_LA]","LA_WANTS_CENTER, DISAGREE_WITH_LA",579.713,648.406,68.694,3,0,-1,0,0,0,-1,1,0,0,1,0
2,19,1,1,2,"[0, 1, 2, 4, 5, 6]","[0, 1, 2, 4, 5, 6]",[],[],[],[],0,0,0,0,0,0,[DISAGREE_WITH_LA],DISAGREE_WITH_LA,145331.419,145331.419,0.0,1,0,0,0,0,0,0,0,-1,0,0,0
3,25,1,0,5,"[0, 4, 5, 6]","[0, 4, 5, 6]",[],[],[],[],0,0,0,0,0,0,[DISAGREE_WITH_LA],DISAGREE_WITH_LA,145294.527,146919.674,1625.146,0,0,0,0,0,0,0,1,-1,0,0,0
4,29,1,6,0,"[0, 4, 6]","[0, 4, 6]",[],[],[],[],0,0,0,1,0,0,[DISAGREE_WITH_LA],DISAGREE_WITH_LA,951.072,951.072,0.0,0,0,0,0,0,0,0,0,0,0,1,0


Tagged plies: 5 / 5


## Board

In [11]:
names = [a.get("Name", f"Agent{i}") for i, a in enumerate(game.get("info", {}).get("Agents", []))]
if len(names) < 2:
    names = ["P1", "P2"]

def show_ply_plus(ply_index: int):
    r0 = df.iloc[int(ply_index)]
    rA = dfA.iloc[int(ply_index)]

    ply = int(r0["ply"])
    mark = int(r0["mark"])
    who = names[0] if mark == 1 else names[1]

    actual_col = int(r0["actual_col"])
    la_col = int(r0["la_col"])

    tags = rA["tags_str"]
    my_wins0 = rA["my_wins0"]
    opp_wins0 = rA["opp_wins0"]
    safe0 = rA["safe0"]
    opp_wins_after_actual = rA["opp_wins_after_actual"]

    ou = r0["overage_used"]
    oa = r0["overage_after"]
    gap = rA.get("score_gap", None)

    lines = []
    lines.append(f"Ply {ply} | {who} (mark {mark})")
    lines.append(f"Actual: col {actual_col} | LA: col {la_col}")
    if gap is not None and not (gap is None or (isinstance(gap, float) and np.isnan(gap))):
        lines.append(f"LA score gap (best - actual): {gap:.3f}")
    lines.append(f"Threats before: my win-in-1={my_wins0} | opp win-in-1={opp_wins0} | safe moves={safe0}")
    lines.append(f"After actual: opp win-in-1={opp_wins_after_actual}")
    lines.append(f"Time: overage_used={ou:.6f} | overage_after={oa}")
    if tags:
        lines.append(f"Tags: {tags}")

    supt = " | ".join(lines)

    board_after = r0["board_after"]
    board_la_after = r0["board_la_after"]
    if board_la_after is None:
        board_la_after = r0["board_before"]

    show_two_boards(
        board_after, f"Actual (col {actual_col})",
        board_la_after, f"LA (col {la_col})",
        suptitle=supt
    )


slider2 = widgets.IntSlider(value=0, min=0, max=len(df)-1, step=1, description="ply")
widgets.interact(show_ply_plus, ply_index=slider2)



interactive(children=(IntSlider(value=0, description='ply', max=4), Output()), _dom_classes=('widget-interact'…

<function __main__.show_ply_plus(ply_index: int)>

In [12]:
report = df.copy()

# Only meaningful if we computed action scores
if "score_gap" in report.columns and report["score_gap"].notna().any():
    report = report.sort_values("score_gap", ascending=False)

cols = [
    "ply", "mark", "actual_col", "la_col", "mismatch",
    "overage_used", "overage_after",
    "actual_score", "best_score", "score_gap",
]
display(report[cols].head(30))


Unnamed: 0,ply,mark,actual_col,la_col,mismatch,overage_used,overage_after,actual_score,best_score,score_gap
24,25,1,0,5,True,0.0,60.0,145294.527,146919.674,1625.146
6,7,1,4,3,True,0.0,60.0,349.372,424.986,75.614
10,11,1,4,3,True,0.0,60.0,579.713,648.406,68.694
18,19,1,1,2,True,0.0,60.0,145331.419,145331.419,0.0
28,29,1,6,0,True,0.0,60.0,951.072,951.072,0.0


In [13]:
# Heuristics to surface "real" mistakes:
# - missed win-in-1
# - failed block / allowed immediate loss
# - or big LA score gap (if available)

def has_tag(s: str, tag: str) -> bool:
    return tag in (s or "")

interesting = dfA.copy()

interesting["is_missed_win"] = interesting["tags_str"].apply(lambda s: has_tag(s, "MISSED_WIN_IN_1"))
interesting["is_fail_block"] = interesting["tags_str"].apply(lambda s: has_tag(s, "FAILED_BLOCK_OPP_WIN_IN_1"))
interesting["is_allow_loss"] = interesting["tags_str"].apply(lambda s: has_tag(s, "ALLOWED_OPP_WIN_IN_1"))

# If score_gap exists, use it; otherwise rank tacticals first
if "score_gap" in interesting.columns and interesting["score_gap"].notna().any():
    interesting = interesting.sort_values(
        by=["is_missed_win", "is_fail_block", "is_allow_loss", "score_gap"],
        ascending=[False, False, False, False],
    )
else:
    interesting = interesting.sort_values(
        by=["is_missed_win", "is_fail_block", "is_allow_loss"],
        ascending=[False, False, False],
    )

cols = [
    "ply","mark","actual_col","la_col","tags_str",
    "my_wins0","opp_wins0","safe0","opp_wins_after_actual",
    "score_gap",
    "dA_me_pure3","dA_opp_pure3","dL_me_pure3","dL_opp_pure3",
]
display(interesting[cols].head(40))


Unnamed: 0,ply,mark,actual_col,la_col,tags_str,my_wins0,opp_wins0,safe0,opp_wins_after_actual,score_gap,dA_me_pure3,dA_opp_pure3,dL_me_pure3,dL_opp_pure3
3,25,1,0,5,DISAGREE_WITH_LA,[],[],"[0, 4, 5, 6]",[],1625.146,0,0,1,0
0,7,1,4,3,"LA_WANTS_CENTER, DISAGREE_WITH_LA",[],[],"[0, 1, 2, 3, 4, 5, 6]",[],75.614,0,0,0,0
1,11,1,4,3,"LA_WANTS_CENTER, DISAGREE_WITH_LA",[],[],"[0, 1, 2, 3, 4, 5, 6]",[],68.694,0,0,1,0
2,19,1,1,2,DISAGREE_WITH_LA,[],[],"[0, 1, 2, 4, 5, 6]",[],0.0,0,0,0,0
4,29,1,6,0,DISAGREE_WITH_LA,[],[],"[0, 4, 6]",[],0.0,0,0,0,0


In [14]:
summary = dfA.copy()
summary["player"] = summary["mark"].map({1: "P1", 2: "P2"})

def rate(x):
    return float(np.mean(x)) if len(x) else float("nan")

agg = summary.groupby("player").agg(
    plies=("ply", "count"),
    disagree_rate=("actual_col", lambda s: float(np.mean(s.values != summary.loc[s.index, "la_col"].values))),
    missed_win=("is_missed_win", "sum") if "is_missed_win" in summary.columns else ("ply","count"),
    fail_block=("is_fail_block", "sum") if "is_fail_block" in summary.columns else ("ply","count"),
    allow_loss=("is_allow_loss", "sum") if "is_allow_loss" in summary.columns else ("ply","count"),
    avg_overage_used=("ply", lambda s: float(np.mean([plies[p-1]["overage_used"] for p in s.values]))),
    avg_score_gap=("score_gap", "mean"),
).reset_index()

display(agg)


Unnamed: 0,player,plies,disagree_rate,missed_win,fail_block,allow_loss,avg_overage_used,avg_score_gap
0,P1,5,1.0,5,5,5,0.0,353.891


In [15]:
# -------------------- Cell 14: Weight Suspicion Report (Lovro-only) --------------------

# ---- IMPORTANT: Lovro-only base + reset index so .loc[i] is valid ----
df_base = df[df["mark"] == LOVRO].copy().reset_index(drop=True)

records = []

for i in range(len(df_base)):
    b0 = df_base.loc[i, "board_before"]
    mark = int(df_base.loc[i, "mark"])  # == LOVRO
    opp = opp_mark(mark)

    legal0 = legal_cols(b0)
    if not legal0:
        continue

    la_col = int(df_base.loc[i, "la_col"])

    my_wins0 = toList(la.count_immediate_wins(b0, mark))
    opp_wins0 = toList(la.count_immediate_wins(b0, opp))

    packs = [move_eval_pack(b0, mark, c) for c in legal0]
    pack_la = move_eval_pack(b0, mark, la_col)

    la_illegal = (pack_la is None)

    pack_def = argmin_lex(packs, keys=("opp_imw", "opp_p3", "opp_fork_after"))
    pack_att = argmax_lex(packs, keys=("my_imw", "my_p3", "my_fork_after"))

    la_skips_win1 = (len(my_wins0) > 0) and (la_col not in my_wins0)
    la_fails_block1 = (len(opp_wins0) > 0) and (len(my_wins0) == 0) and (la_col not in opp_wins0)

    la_allows_opp_win1 = False
    la_unsafe_when_safe_exists = False
    la_center_when_unsafe = False
    la_greedy_fork_over_safety = False
    la_not_defensive_min = False

    if pack_la is None:
        la_allows_opp_win1 = True
        la_unsafe_when_safe_exists = (pack_def is not None)
    else:
        la_allows_opp_win1 = (pack_la["opp_imw"] > 0) and (len(my_wins0) == 0)
        la_unsafe_when_safe_exists = (pack_la["opp_imw"] > 0) and (pack_def is not None) and (pack_def["opp_imw"] == 0)
        la_center_when_unsafe = (la_col == 3) and la_unsafe_when_safe_exists

        la_greedy_fork_over_safety = (
            la_unsafe_when_safe_exists
            and pack_def is not None
            and pack_la["my_fork_after"] > pack_def["my_fork_after"]
        )

        if pack_def is not None:
            la_not_defensive_min = (pack_la["opp_imw"], pack_la["opp_p3"], pack_la["opp_fork_after"]) != (
                pack_def["opp_imw"], pack_def["opp_p3"], pack_def["opp_fork_after"]
            )

    la_center = (la_col == 3)

    records.append({
        "ply": int(df_base.loc[i, "ply"]),
        "mark": mark,
        "la_col": la_col,
        "legal_n": len(legal0),
        "my_wins0": my_wins0,
        "opp_wins0": opp_wins0,
        "la_illegal": la_illegal,
        "la_center": la_center,

        "la_skips_win1": la_skips_win1,
        "la_fails_block1": la_fails_block1,
        "la_allows_opp_win1": la_allows_opp_win1,
        "la_unsafe_when_safe_exists": la_unsafe_when_safe_exists,
        "la_center_when_unsafe": la_center_when_unsafe,
        "la_greedy_fork_over_safety": la_greedy_fork_over_safety,
        "la_not_defensive_min": la_not_defensive_min,

        "la_pack": pack_la,
        "def_pack": pack_def,
        "att_pack": pack_att,
    })

dfS = pd.DataFrame(records)

def pct(col):
    return 0.0 if len(dfS) == 0 else 100.0 * float(dfS[col].mean())

print(f"Analysed Lovro plies: {len(dfS)} / {len(df_base)} (LA_DEPTH={LA_DEPTH}) | LOVRO mark={LOVRO}")

summary_cols = [
    "la_skips_win1",
    "la_fails_block1",
    "la_allows_opp_win1",
    "la_unsafe_when_safe_exists",
    "la_center_when_unsafe",
    "la_greedy_fork_over_safety",
    "la_not_defensive_min",
]
summary = pd.DataFrame({
    "metric": summary_cols,
    "count": [int(dfS[c].sum()) for c in summary_cols],
    "rate_%": [pct(c) for c in summary_cols],
})
display(summary)

# ---- Weight suspicion heuristics (Lovro-only) ----
suspicions = []

if len(dfS) == 0:
    suspicions.append("No Lovro plies found after filtering (is the name exactly 'Lovro Selic' in the JSON?)")
else:
    if dfS["la_skips_win1"].sum() > 0:
        suspicions.append(
            "LA sometimes skips WIN-IN-1. Suspect: terminal/win detection path, or IMMEDIATE_W/MATE_SCORE handling. "
            "This should be ~0 even at depth=1."
        )

    if dfS["la_fails_block1"].sum() > 0:
        suspicions.append(
            "LA sometimes fails to BLOCK opponent WIN-IN-1 (when no win-in-1 exists for itself). "
            "Suspect: immediate threat logic or too-low DEFENSIVE / IMMEDIATE_W weighting (or depth too low)."
        )

    unsafe_rate = pct("la_unsafe_when_safe_exists")
    if unsafe_rate >= 2.0:
        suspicions.append(
            f"LA chooses unsafe moves despite safe alternatives fairly often ({unsafe_rate:.1f}%). "
            "Suspect: DEFENSIVE too low, or 'pretty threats' (pure3/forks/center) overweighted vs opponent immediate threats."
        )

    center_unsafe_rate = pct("la_center_when_unsafe")
    if center_unsafe_rate >= 0.5:
        suspicions.append(
            f"Center addiction detected: LA plays center while unsafe ({center_unsafe_rate:.1f}%). "
            "Suspect: CENTER_BONUS too high relative to defensive terms."
        )

    fork_greed_rate = pct("la_greedy_fork_over_safety")
    if fork_greed_rate >= 0.5:
        suspicions.append(
            f"Fork greed detected ({fork_greed_rate:.1f}%): LA prefers creating forks even when a safe move exists. "
            "Suspect: FORK_W too high relative to DEFENSIVE / IMMEDIATE_W."
        )

    not_def_min_rate = pct("la_not_defensive_min")
    if not_def_min_rate >= 15.0:
        suspicions.append(
            f"LA often ignores the strict defensive minimizer ({not_def_min_rate:.1f}%). "
            "If those are your blunder spots, suspect opponent threat penalties too soft or FLOATING_* too permissive."
        )

    if not suspicions:
        suspicions.append(
            "Nothing screamingly wrong from these Lovro-only safety/tactic diagnostics. "
            "If play still feels weird, it’s likely deeper weighting (FLOATING_NEAR/FAR, parity gating, center gating) or depth."
        )

print("\nWEIGHT SUSPICION REPORT (Lovro-only)")
for j, s in enumerate(suspicions, 1):
    print(f"{j}. {s}")

# ---- Drilldown ----
worst = dfS[
    (dfS["la_unsafe_when_safe_exists"]) |
    (dfS["la_fails_block1"]) |
    (dfS["la_skips_win1"])
].copy()

if len(worst) > 0:
    worst = worst.sort_values(
        by=["la_skips_win1","la_fails_block1","la_unsafe_when_safe_exists","la_center_when_unsafe","la_greedy_fork_over_safety"],
        ascending=False
    )
    display(worst[["ply","mark","la_col","my_wins0","opp_wins0","la_center","la_center_when_unsafe","la_greedy_fork_over_safety","la_pack","def_pack"]].head(25))
    print("\nTip: run show_ply_plus(ply-1) on any row above to inspect visually.")
else:
    print("\nNo 'worst' rows found under current criteria (Lovro-only).")


Analysed Lovro plies: 5 / 5 (LA_DEPTH=9) | LOVRO mark=1


Unnamed: 0,metric,count,rate_%
0,la_skips_win1,0,0.0
1,la_fails_block1,0,0.0
2,la_allows_opp_win1,0,0.0
3,la_unsafe_when_safe_exists,0,0.0
4,la_center_when_unsafe,0,0.0
5,la_greedy_fork_over_safety,0,0.0
6,la_not_defensive_min,0,0.0



WEIGHT SUSPICION REPORT (Lovro-only)
1. Nothing screamingly wrong from these Lovro-only safety/tactic diagnostics. If play still feels weird, it’s likely deeper weighting (FLOATING_NEAR/FAR, parity gating, center gating) or depth.

No 'worst' rows found under current criteria (Lovro-only).


In [16]:
# ---- Pull the diagnostic rates from dfS (Cell 14 output, which is Lovro-only now) ----
unsafe_rate = 100.0 * float(dfS["la_unsafe_when_safe_exists"].mean()) if len(dfS) else 0.0
center_unsafe_rate = 100.0 * float(dfS["la_center_when_unsafe"].mean()) if len(dfS) else 0.0
fork_greed_rate = 100.0 * float(dfS["la_greedy_fork_over_safety"].mean()) if len(dfS) else 0.0
skip_win1 = int(dfS["la_skips_win1"].sum()) if len(dfS) else 0
fail_block1 = int(dfS["la_fails_block1"].sum()) if len(dfS) else 0

coward_rate = compute_coward_rate_lovro(df,  LOVRO)

kn = get_la_knobs(la)

print("Current knobs snapshot:")
display(pd.DataFrame([kn]).T.rename(columns={0: "value"}))

print("\nRates (Lovro-only):")
print(f"- unsafe_when_safe_exists: {unsafe_rate:.2f}%")
print(f"- center_when_unsafe:      {center_unsafe_rate:.2f}%")
print(f"- greedy_fork_over_safety: {fork_greed_rate:.2f}%")
print(f"- coward_rate (safe but timid): {coward_rate:.2f}%")
print(f"- skips win-in-1: {skip_win1} | fails block-in-1: {fail_block1}")

# ---- Build proposed tweaks (no auto-apply) ----
PATCH_SAFETY = {}
PATCH_ATTACK = {}

# SAFETY tweaks
if unsafe_rate >= 5.0:
    PATCH_SAFETY["DEFENSIVE"] = clamp(kn["DEFENSIVE"] + 0.50, lo=1.0, hi=3.0)
elif unsafe_rate >= 2.0:
    PATCH_SAFETY["DEFENSIVE"] = clamp(kn["DEFENSIVE"] + 0.25, lo=1.0, hi=3.0)
elif unsafe_rate >= 1.0:
    PATCH_SAFETY["DEFENSIVE"] = clamp(kn["DEFENSIVE"] + 0.15, lo=1.0, hi=3.0)

if fork_greed_rate >= 0.5:
    PATCH_SAFETY["fork_w"] = clamp(kn["fork_w"] * 0.85, lo=0.1, hi=kn["fork_w"])
elif unsafe_rate >= 2.0:
    PATCH_SAFETY["fork_w"] = clamp(kn["fork_w"] * 0.92, lo=0.1, hi=kn["fork_w"])

if center_unsafe_rate >= 0.5:
    drop = 2.0 if center_unsafe_rate >= 2.0 else 1.0
    PATCH_SAFETY["CENTER_BONUS"] = clamp(kn["CENTER_BONUS"] - drop, lo=0.0, hi=50.0)

if unsafe_rate >= 2.0:
    PATCH_SAFETY["FLOATING_NEAR"] = clamp(kn["FLOATING_NEAR"] * 0.85, lo=0.0, hi=5.0)
    PATCH_SAFETY["FLOATING_FAR"]  = clamp(kn["FLOATING_FAR"]  * 0.85, lo=0.0, hi=5.0)

# ATTACK tweaks (only if already safe but timid)
if unsafe_rate <= 1.0 and coward_rate >= 25.0:
    PATCH_ATTACK["DEFENSIVE"] = clamp(kn["DEFENSIVE"] - 0.15, lo=1.0, hi=3.0)
    PATCH_ATTACK["THREATSPACE_W"] = clamp(kn["THREATSPACE_W"] + 0.10, lo=0.0, hi=3.0)
    PATCH_ATTACK["FLOATING_NEAR"] = clamp(kn["FLOATING_NEAR"] * 0.90, lo=0.0, hi=5.0)
    PATCH_ATTACK["FLOATING_FAR"]  = clamp(kn["FLOATING_FAR"]  * 0.90, lo=0.0, hi=5.0)

notes = []
if skip_win1 > 0 or fail_block1 > 0:
    notes.append(
        "Note: LA skipping win-in-1 or failing block-in-1 is usually NOT a tuning issue, "
        "it suggests depth too low or a bug in win detection/terminal scoring. "
        "Re-test with LA_DEPTH >= 5 and confirm these counts drop to ~0."
    )

dfT = pd.DataFrame(patch_to_rows(PATCH_SAFETY, "PATCH_SAFETY", kn) + patch_to_rows(PATCH_ATTACK, "PATCH_ATTACK", kn))

if len(dfT) == 0:
    print("\nNo tweak suggested by current thresholds.")
else:
    display(dfT)

if notes:
    print("\n".join(notes))


Current knobs snapshot:


Unnamed: 0,value
DEFENSIVE,1.4
FLOATING_NEAR,0.225
FLOATING_FAR,0.09
CENTER_BONUS,7.0
PARITY_BONUS,1.0
VERT_MUL,1.1
VERT_3_READY_BONUS,0.0
TEMPO_W,0.25
THREATSPACE_W,0.5
PARITY_MOVE_W,0.2



Rates (Lovro-only):
- unsafe_when_safe_exists: 0.00%
- center_when_unsafe:      0.00%
- greedy_fork_over_safety: 0.00%
- coward_rate (safe but timid): 80.00%
- skips win-in-1: 0 | fails block-in-1: 0


Unnamed: 0,patch,knob,current,proposed,delta
0,PATCH_ATTACK,DEFENSIVE,1.4,1.25,-0.15
1,PATCH_ATTACK,THREATSPACE_W,0.5,0.6,0.1
2,PATCH_ATTACK,FLOATING_NEAR,0.225,0.203,-0.022
3,PATCH_ATTACK,FLOATING_FAR,0.09,0.081,-0.009


# KAGGLE version

In [17]:
PLY_TO_INSPECT = PLY


In [18]:

row = df[df["ply"] == PLY].iloc[0]

b0 = np.asarray(row["board_before"], dtype=np.int8)
mark = int(row["mark"])

# Numba choice (what your analyzer calls LA)
mv_numba = la.n_step_lookahead(b0, mark, depth=LA_DEPTH)

# Numba per-move scores (same engine, different entrypoint)
scores = la.n_step_action_scores(b0, mark, depth=LA_DEPTH)
legal = la.legal_actions(b0)

tbl = pd.DataFrame({
    "col": legal,
    "score_numba": [float(scores[c]) for c in legal],
})
tbl = tbl.sort_values("score_numba", ascending=False).reset_index(drop=True)

print("PLY", PLY, "| mark", mark)
print("Numba n_step_lookahead:", mv_numba)
print("Numba argmax(score):   ", int(tbl.iloc[0]["col"]))
display(tbl)

#kaggle
print("Kaggle func:")
obs = {"board": b0.reshape(-1).tolist(), "mark": mark}
cfg = {"rows": int(ROWS), "columns": int(COLS)}
mv_kaggle = N_step_lookahead_bitboard(obs, cfg)
print("Kaggle N_step_lookahead_bitboard:", mv_kaggle)



PLY 25 | mark 1
Numba n_step_lookahead: 5
Numba argmax(score):    5


Unnamed: 0,col,score_numba
0,5,146919.674
1,0,145294.527
2,6,142125.097
3,4,229.103


Kaggle func:
DEBUG depth_end best_move 5 best_val 146919.7735
Depth reached 9 time remaining 1.9077881999546662
Kaggle N_step_lookahead_bitboard: 5


In [19]:
COLS_TO_COMPARE = [mv_kaggle, mv_numba]   # your mismatch: you played 0, LA preferred 1

In [20]:
# -------------------- Cell: Ply drilldown (root score breakdown) --------------------

row = df[df["ply"] == PLY_TO_INSPECT].iloc[0]
b0 = row["board_before"]
mark = int(row["mark"])



print(f"PLY {PLY_TO_INSPECT} | mark {mark} | comparing cols {COLS_TO_COMPARE} | LA_DEPTH={LA_DEPTH}")

rows = []
for c in COLS_TO_COMPARE:
    bias = root_bias_components(la, b0, mark, c)
    search = child_search_value(la, b0, mark, LA_DEPTH, c)

    total = float(search + bias["bias_total"])
    flip_margin = float(total - search)  # == bias_total

    denom = max(1e-9, abs(total))
    nudge_dom = float(abs(bias["bias_total"]) / denom)

    rows.append({
        "col": int(c),
        "search_only": float(search),

        # bias breakdown
        "bias_total": float(bias["bias_total"]),
        "parity_net": float(bias["bias_parity_move"] + bias["bias_parity_unlock"]),
        "bias_parity_move": float(bias["bias_parity_move"]),
        "bias_parity_unlock": float(bias["bias_parity_unlock"]),
        "bias_threatspace": float(bias["bias_threatspace"]),

        # threat details
        "my_threats": int(bias["my_threats"]),
        "opp_threats": int(bias["opp_threats"]),
        "landing_row_r": int(bias["landing_row_r"]),

        # totals
        "total": total,
        "flip_margin": flip_margin,

        # dominance / diagnostics
        "nudge_dom_%": 100.0 * nudge_dom,
        "parity_enabled": bool(bias["parity_enabled"]),
        "root_pos_is_first": bool(bias["root_pos_is_first"]),
    })

df_dbg = pd.DataFrame(rows).sort_values("total", ascending=False)
display(df_dbg)

best_total, second_total = top2(df_dbg, "total", descending=True)
best_search, second_search = top2(df_dbg, "search_only", descending=True)

if best_total is not None and best_search is not None:
    best_total_col = int(best_total["col"])
    best_search_col = int(best_search["col"])

    # how close is search among the compared moves?
    search_range = float(df_dbg["search_only"].max() - df_dbg["search_only"].min()) if len(df_dbg) else 0.0
    bias_range = float(df_dbg["bias_total"].max() - df_dbg["bias_total"].min()) if len(df_dbg) else 0.0

    # local margin between top-2 choices
    if second_search is not None:
        searchtop2_gap = float(best_search["search_only"] - second_search["search_only"])
    else:
        searchtop2_gap = float("nan")

    if second_total is not None:
        totaltop2_gap = float(best_total["total"] - second_total["total"])
    else:
        totaltop2_gap = float("nan")

    print("\nInterpretation tip:")
    print("- search_only = what minimax prefers (no root nudges). Higher is better.")
    print("- bias_total  = root nudges (parity move/unlock + threatspace).")
    print("- total       = search_only + bias_total (what root_select_fixed actually ranks).")

    print("\nQuick read:")
    print(f"- best_by_total : col {best_total_col}")
    print(f"- best_by_search: col {best_search_col}")
    print(f"- search_range over compared cols: {search_range:.4f}")
    print(f"- bias_range   over compared cols: {bias_range:.4f}")
    if second_search is not None:
        print(f"- searchtop2_gap (best - 2nd): {searchtop2_gap:.4f}")
    if second_total is not None:
        print(f"- totaltop2_gap  (best - 2nd): {totaltop2_gap:.4f}")

    # opinionated warning thresholds (tuned for your typical 0.1..0.6 nudge scales)
    CLOSE_SEARCH = 0.20
    BIG_BIAS = 0.30

    if best_total_col != best_search_col and (search_range <= CLOSE_SEARCH) and (bias_range >= BIG_BIAS):
        print("\n⚠️  NUDGE DOMINANCE WARNING")
        print("Root nudges likely decided this choice (search was close, bias spread is large).")
        print("This is where PARITY_MOVE/UNLOCK or THREATSPACE can flip close calls.")
    elif best_total_col != best_search_col:
        print("\n⚠️  CHOICE DISAGREEMENT")
        print("Root-total choice differs from pure search choice (could still be fine).")
else:
    print("Not enough data to compute dominance warnings.")


PLY 25 | mark 1 | comparing cols [5, 5] | LA_DEPTH=9


Unnamed: 0,col,search_only,bias_total,parity_net,bias_parity_move,bias_parity_unlock,bias_threatspace,my_threats,opp_threats,landing_row_r,total,flip_margin,nudge_dom_%,parity_enabled,root_pos_is_first
0,5,146919.674,0.1,0.1,0.2,-0.1,0.0,0,0,4,146919.774,0.1,0.0,True,True
1,5,146919.674,0.1,0.1,0.2,-0.1,0.0,0,0,4,146919.774,0.1,0.0,True,True



Interpretation tip:
- search_only = what minimax prefers (no root nudges). Higher is better.
- bias_total  = root nudges (parity move/unlock + threatspace).
- total       = search_only + bias_total (what root_select_fixed actually ranks).

Quick read:
- best_by_total : col 5
- best_by_search: col 5
- search_range over compared cols: 0.0000
- bias_range   over compared cols: 0.0000
- searchtop2_gap (best - 2nd): 0.0000
- totaltop2_gap  (best - 2nd): 0.0000
