# 02｜視覺化：投手 GB% 四象限 + 打者情境卡（Batter Card）

本 Notebook 依賴 Notebook 01 的輸出檔：
- `pitchers_groundball_profile.csv`（你先前產出的投手 GB 檔）
- `batter_ctx_fear_pitch_gb_topzones_full.csv`（Notebook 01 產出）

目的：
1) 用四象限圖快速定位「滾地球型投手」
2) 透過輸入打者名字，產生情境化弱點球種/落點摘要卡


In [None]:
#（Colab 可選）掛載 Google Drive
from google.colab import drive
drive.mount('/content/drive')

## 2.1 投手 GB% 四象限圖

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATA_DIR = "/content/drive/MyDrive/CPBL_csv_tables_UTF8_BOM"  # <- 改這裡
PITCHER_GB_PATH   = os.path.join(DATA_DIR, "pitchers_groundball_profile.csv")
PITCHERBOX_PATH   = os.path.join(DATA_DIR, "pitcherBox.csv")

OUT_PNG = os.path.join(DATA_DIR, "pitcher_quadrant_gb_kpct.png")
OUT_CSV = os.path.join(DATA_DIR, "pitcher_quadrant_table_gb_kpct.csv")

gb = pd.read_csv(PITCHER_GB_PATH, encoding="utf-8-sig", low_memory=False)
pb = pd.read_csv(PITCHERBOX_PATH, encoding="utf-8-sig", low_memory=False)

# ========== 1) 自動偵測欄位 ==========
def pick_col(df, candidates, required=True):
    for c in candidates:
        if c in df.columns:
            return c
    if required:
        raise KeyError(f"Cannot find any of columns: {candidates}")
    return None

# 投手姓名欄位
gb_name_col = pick_col(gb, ["pitcherName","playerName","name"])
pb_name_col = pick_col(pb, ["playerName","pitcherName","name"])

# GB% 欄位（你那張 profile 可能叫 GB% 或 gb_rate）
gb_pct_col = None
for c in ["GB%","gb_pct","gb_rate","GB_rate","GB_rate_pct"]:
    if c in gb.columns:
        gb_pct_col = c
        break
if gb_pct_col is None:
    print("pitchers_groundball_profile.csv columns:", gb.columns.tolist())
    raise KeyError("找不到 GB% 欄位，請確認 pitchers_groundball_profile.csv 的 GB% 欄位名")

# pitcherBox 的 SO / BF 欄位
so_col = pick_col(pb, ["SO","so","strikeouts"])
bf_col = pick_col(pb, ["BF","bf","battersFaced"])

# ========== 2) 清理與彙總 pitcherBox ==========
pb = pb.rename(columns={pb_name_col:"pitcherName"}).copy()
pb["pitcherName"] = pb["pitcherName"].astype(str).str.strip()

# 轉數值
pb[so_col] = pd.to_numeric(pb[so_col], errors="coerce").fillna(0)
pb[bf_col] = pd.to_numeric(pb[bf_col], errors="coerce").fillna(0)

# 同一投手可能在不同場次/隊伍出現 → 先彙總
pb_sum = (pb.groupby("pitcherName", as_index=False)
            .agg(SO=(so_col,"sum"),
                 BF=(bf_col,"sum")))

pb_sum["K_pct"] = np.where(pb_sum["BF"]>0, pb_sum["SO"]/pb_sum["BF"], np.nan)

# ========== 3) 合併 GB profile ==========
gb2 = gb.rename(columns={gb_name_col:"pitcherName"}).copy()
gb2["pitcherName"] = gb2["pitcherName"].astype(str).str.strip()
gb2[gb_pct_col] = pd.to_numeric(gb2[gb_pct_col], errors="coerce")

df = gb2.merge(pb_sum, on="pitcherName", how="left")

# 過濾極小樣本（避免 BF 太小亂跳）
MIN_BF = 50
df = df[df["BF"].fillna(0) >= MIN_BF].copy()

# ========== 4) 四象限切點（中位數） ==========
x = df[gb_pct_col].astype(float)
y = df["K_pct"].astype(float)

x_med = float(np.nanmedian(x))
y_med = float(np.nanmedian(y))

def quadrant(gbv, kv):
    if np.isnan(gbv) or np.isnan(kv):
        return "UNK"
    if gbv >= x_med and kv >= y_med: return "Q1 高GB高K"
    if gbv >= x_med and kv <  y_med: return "Q2 高GB低K(佈陣最有利)"
    if gbv <  x_med and kv >= y_med: return "Q3 低GB高K"
    return "Q4 低GB低K(風險高)"

df["Quadrant"] = [quadrant(a,b) for a,b in zip(x,y)]
df.to_csv(OUT_CSV, index=False, encoding="utf-8-sig")
print("Saved:", OUT_CSV, "| rows:", len(df))
print("Median GB%:", x_med, "Median K%:", y_med)

# ========== 5) 畫圖 ==========
plt.figure(figsize=(10,7))
plt.scatter(x, y, alpha=0.6)
plt.axvline(x_med)
plt.axhline(y_med)
plt.xlabel(gb_pct_col)
plt.ylabel("K% (SO / BF)")
plt.title("Pitcher Quadrant: GB% vs K% (SO/BF)")

# 標註幾個代表投手（不會太擠）
top_anno = df.sort_values([gb_pct_col, "K_pct"], ascending=False).head(12)
for _, r in top_anno.iterrows():
    plt.text(float(r[gb_pct_col]), float(r["K_pct"]), r["pitcherName"], fontsize=8)

plt.tight_layout()
plt.savefig(OUT_PNG, dpi=200)
print("Saved:", OUT_PNG)
plt.show()

## 2.2 打者情境卡（輸入打者名字 → 弱點球種/GB落點摘要）
提示：先跑完 Notebook 01，確保 `batter_ctx_fear_pitch_gb_topzones_full.csv` 存在。

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

DATA_DIR = "/content/drive/MyDrive/CPBL_csv_tables_UTF8_BOM"  # <-改這裡
FEAR_PATH = os.path.join(DATA_DIR, "batter_ctx_fear_pitch_gb_topzones_full.csv")

fear = pd.read_csv(FEAR_PATH, encoding="utf-8-sig")
fear["batterName"] = fear["batterName"].astype(str).str.strip()

# === 中文字型（關鍵）===
font_path = "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc"
font_prop = fm.FontProperties(fname=font_path)
plt.rcParams["axes.unicode_minus"] = False

rate_col = "fear_whiff_rate" if "fear_whiff_rate" in fear.columns else None
if rate_col is None:
    raise KeyError("找不到 fear_whiff_rate 欄位")

# 情境順序（論文比較好讀）
ctx_order = [
    ("Empty","Even"), ("Empty","PitcherAdv"), ("Empty","BatterAdv"),
    ("OnBase","Even"), ("OnBase","PitcherAdv"), ("OnBase","BatterAdv"),
    ("RISP","Even"), ("RISP","PitcherAdv"), ("RISP","BatterAdv"),
]

def defense_rule_from_topzones(z):
    if z is None:
        return "資料不足。"
    s = str(z).strip()
    if s == "":
        return "該情境下滾地球樣本不足，暫不建議調整。"
    zones = set([a.strip() for a in s.split(",") if a.strip() != ""])
    if any(t in zones for t in ["6","56","7"]):
        return "建議二游整體往二壘側靠，強化二壘附近佈防。"
    if any(t in zones for t in ["5","45","4"]):
        return "建議三壘側加重，三壘手靠近邊線、游擊偏三壘。"
    if any(t in zones for t in ["3","34","4"]):
        return "建議一、二壘側加重，二壘手靠近一壘洞。"
    return "建議依主要落點區域微調內野站位。"

def zh_context(bg, cg):
    bg_map = {"Empty":"壘上無人", "OnBase":"壘上有人", "RISP":"得點圈"}
    cg_map = {"PitcherAdv":"投手優勢球數", "Even":"球數均勢", "BatterAdv":"打者優勢球數", "Other":"其他"}
    return f"{bg_map.get(bg,bg)}｜{cg_map.get(cg,cg)}"

def find_batters(query, topn=20):
    """模糊搜尋：輸入部分字串，回傳最接近的打者清單"""
    q = str(query).strip()
    if q == "":
        return []
    names = fear["batterName"].dropna().unique().tolist()
    hits = [n for n in names if q in n]
    return hits[:topn]

def make_batter_card(batter_name, out_dir=DATA_DIR):
    batter_name = str(batter_name).strip()
    sub = fear[fear["batterName"] == batter_name].copy()
    if len(sub) == 0:
        raise ValueError(f"找不到打者：{batter_name}。可先用 find_batters('關鍵字') 查詢。")

    # 打者整體平均 fear rate（當作卡片上的摘要）
    mean_rate = sub[rate_col].mean()

    lines = []
    lines.append(f"打者：{batter_name}")
    lines.append(f"整體弱點強度（平均揮空率）：{mean_rate:.3f}")
    lines.append("（每格為：該情境下『最弱球種』與其揮空率，並附滾地球主要落點區）")
    lines.append("")

    for bg, cg in ctx_order:
        one = sub[(sub["bases_group"] == bg) & (sub["count_group"] == cg)]
        if len(one) == 0:
            continue
        r = one.iloc[0]
        advice = defense_rule_from_topzones(r.get("gb_topzones", ""))

        lines.append(f"■ 情境：{zh_context(bg,cg)}")
        lines.append(f"  弱點球種：{r['fear_pitchType']}　|　揮空率：{float(r[rate_col]):.3f}　|　GB樣本數：{int(r.get('gb_inplay_n',0))}")
        lines.append(f"  滾地球落點Top區：{r.get('gb_topzones','')}")
        lines.append(f"  守備建議：{advice}")
        lines.append("")

    # 存檔
    safe_name = batter_name.replace("/", "_")
    card_txt = os.path.join(out_dir, f"card_{safe_name}.txt")
    card_png = os.path.join(out_dir, f"card_{safe_name}.png")

    with open(card_txt, "w", encoding="utf-8") as f:
        f.write("\n".join(lines))

    plt.figure(figsize=(11, 8))
    plt.axis("off")
    plt.text(
        0.01, 0.99,
        "\n".join(lines),
        va="top",
        fontproperties=font_prop,   # 中文不空格關鍵
        fontsize=12
    )
    plt.tight_layout()
    plt.savefig(card_png, dpi=200)
    plt.show()

    print("Saved:", card_png)
    print("Saved:", card_txt)
    return card_png, card_txt


# ========= 用法示範 =========
# 1) 先查名字（避免打錯）
print(find_batters("陳"))

# 2) 產生卡片
make_batter_card("陳傑憲")
print("fear rows:", len(fear))
print("unique batters:", fear["batterName"].nunique())
print("sample batters:", fear["batterName"].dropna().unique()[:20])