In [None]:
# chess_analysis.py
# Analyze chesscom_<username>_games.csv and produce insights + plots + a study plan
# pip install pandas matplotlib

import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path

CSV = next(Path(".").glob("chesscom_*_games.csv"))  # pick the file you just created
MIN_GAMES = 30  # threshold for opening strengths/weaknesses tables
LOCAL_TZ = "America/New_York"

df = pd.read_csv(CSV)

# --- Prep ---
# result score: W=1, D=0.5, L=0
score_map = {"W": 1.0, "D": 0.5, "L": 0.0}
df["score"] = df["your_result"].map(score_map)

# parse time; convert to local tz (NY)
dt = pd.to_datetime(df["end_time_utc"], utc=True, errors="coerce")
df["end_time_local"] = dt.dt.tz_convert(LOCAL_TZ)
df["date"] = df["end_time_local"].dt.date
df["hour"] = df["end_time_local"].dt.hour
df["dow"] = df["end_time_local"].dt.day_name()

# --- KPIs ---
total = len(df)
wins = (df["your_result"] == "W").sum()
draws = (df["your_result"] == "D").sum()
losses = (df["your_result"] == "L").sum()
winrate = wins / total * 100 if total else 0
by_color = df.groupby("your_side")["score"].mean().mul(100).round(1)
by_timeclass = df.groupby("time_class")["score"].agg(["count","mean"]).rename(columns={"mean":"score_pct"})
by_timeclass["score_pct"] = (by_timeclass["score_pct"]*100).round(1)

print(f"Games: {total:,} | W: {wins} D: {draws} L: {losses} | Win%: {winrate:.1f}")
print("\nScore% by color:")
print(by_color.to_string())
print("\nScore% by time class:")
print(by_timeclass.sort_values('score_pct', ascending=False).to_string())

# --- Openings: strengths/weaknesses ---
opn = (
    df.groupby(["eco","opening_name"])
      .agg(games=("score","size"), score_pct=("score","mean"))
      .reset_index()
)
opn["score_pct"] = (opn["score_pct"]*100).round(1)
opn_big = opn[opn["games"] >= MIN_GAMES].copy()

strengths = opn_big.sort_values(["score_pct","games"], ascending=[False, False]).head(25)
weaknesses = opn_big.sort_values(["score_pct","games"], ascending=[True, False]).head(25)

strengths.to_csv("openings_strengths.csv", index=False)
weaknesses.to_csv("openings_weaknesses.csv", index=False)
print(f"\nSaved openings_strengths.csv and openings_weaknesses.csv (min_games={MIN_GAMES})")

# --- Rating trend (monthly) ---
trend = (
    df.dropna(subset=["your_rating"])
      .assign(month=lambda d: pd.to_datetime(d["end_time_utc"]).dt.tz_convert("UTC").dt.to_period("M").dt.to_timestamp())
      .groupby("month")["your_rating"].mean()
)

if not trend.empty:
    plt.figure(figsize=(8,4))
    trend.plot()
    plt.title("Average Rating Over Time (Monthly)")
    plt.xlabel("Month")
    plt.ylabel("Average rating")
    plt.tight_layout()
    plt.savefig("rating_trend_monthly.png", dpi=150)
    plt.close()
    print("Saved rating_trend_monthly.png")

# --- Time class bar (score%) ---
tc = by_timeclass.sort_values("score_pct")
if not tc.empty:
    plt.figure(figsize=(6,4))
    plt.bar(tc.index.astype(str), tc["score_pct"])
    plt.title("Score% by Time Class")
    plt.xlabel("Time class")
    plt.ylabel("Score %")
    plt.tight_layout()
    plt.savefig("score_by_timeclass.png", dpi=150)
    plt.close()
    print("Saved score_by_timeclass.png")

# --- Hour-of-day performance (optional quick look) ---
hourperf = df.groupby("hour")["score"].mean().mul(100)
if not hourperf.empty:
    plt.figure(figsize=(8,4))
    plt.plot(hourperf.index, hourperf.values, marker="o")
    plt.title("Score% by Hour of Day (Local)")
    plt.xlabel("Hour")
    plt.ylabel("Score %")
    plt.xticks(range(0,24,2))
    plt.tight_layout()
    plt.savefig("score_by_hour.png", dpi=150)
    plt.close()
    print("Saved score_by_hour.png")

# --- Study plan: pick 5 weakest high-volume openings ---
top_weak = weaknesses.head(5).copy()
plan_lines = [
    "# Chess Study Plan (Auto-generated)",
    "",
    f"- Total games analyzed: **{total:,}**",
    f"- Overall win%: **{winrate:.1f}%**",
    f"- By color (score%): **White {by_color.get('white',float('nan')):.1f}%**, **Black {by_color.get('black',float('nan')):.1f}%**",
    "",
    "## Top 5 Weakest Openings (min {MIN_GAMES} games)",
]
for _, r in top_weak.iterrows():
    eco = r["eco"] or "—"
    name = r["opening_name"] or "Unknown"
    plan_lines.append(f"- **{eco} – {name}**: {r['score_pct']:.1f}% over {int(r['games'])} games")

plan_lines += [
    "",
    "### What to do next (for each weak opening):",
    "1. Review a 10–15 min theory video and write a 6-line summary.",
    "2. Create a **one-page trap sheet** (common tactics & pitfalls).",
    "3. Play 5 focused games **starting from the tabiya** (use analysis board to reach position).",
    "4. Post-mortem: save 2 mistakes per game in a spaced-repetition deck.",
    "",
    "### Habit tweaks from your data:",
    f"- Strongest time classes: {', '.join(by_timeclass.sort_values('score_pct', ascending=False).head(2).index.astype(str))}",
    f"- Watch-out hours: top dip near hour(s) {', '.join(map(str, hourperf.sort_values().head(2).index.tolist()))} (local time).",
    "",
    "_Generated by chess_analysis.py_",
]
Path("study_plan.md").write_text("\n".join(plan_lines), encoding="utf-8")
print("Saved study_plan.md")
