<a href="https://colab.research.google.com/github/nanafish/ORS/blob/main/%E5%9B%9B%E5%8D%8A%E6%9C%9F%E8%A1%A8%E3%81%AE%E3%81%BE%E3%81%BE%E8%AA%AD%E3%82%80%E3%82%B9%E3%82%B1%E3%82%B8%E3%83%A5%E3%83%BC%E3%83%AB%E8%AA%BF%E6%95%B4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）
# ★重要修正：last_pos方式を廃止し、確定(未来含む)＋提案の「最短距離(min distance)」でスパン判定
#   → 豊中(6-4w確定)があるなら、6-2wに豊中/吹田グループは入らない
#   → 木更津(市原連動)も同様に「前後の確定」を見て弾く
# ============================================

!pip -q install fugashi unidic-lite jaconv

import os, re, math, random, difflib, bisect
import pandas as pd
import openpyxl
import jaconv
from fugashi import Tagger
tagger = Tagger()

# ====== 入力 ======
UP = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_fix_minDist_groupSpan (1).xlsx"
BASE = "SA+AJ+共有用_四半期表20240303.xlsx"
QUARTER_XLSX = UP if os.path.exists(UP) else BASE

REGION_XLSX      = "43期地域別会場回数.xlsx"
STATS_XLSX       = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_fix_minDist_groupSpan_OUT.xlsx"

# ====== 列（0-based）=====
COL_WEEK = 0
COL_AREA_OR_KIND = 1     # ヘッダ行=東/西/九州、次行=AJ/合同/SA
COL_CITY = 2
COL_VENUE = 3
COL_PREF = 5
COL_REASON_BT = 72

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,

    "SAME_WEEK_PREF_PENALTY": 80.0,
    "SAME_WEEK_CITY_PENALTY": 60.0,   # 同週同グループは強ペナ（禁止にすると詰むのでペナ）

    "FUZZY_CUTOFF": 0.86,
    "CANON_CUTOFF": 0.93,

    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    "OVR_CLASS_J_VALUE": 1,
    "OVR_MIN_POP": 180000,
    "OVR_OVERFLOW_PENALTY": 15.0,

    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2,
              "low_attr_city": 2.0, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0,
              "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2,
              "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ======================================================
# 0) alias辞書（CSV）
# ======================================================
ALIAS_CSV = "市区分_alias.csv"

def ensure_alias_template(path=ALIAS_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"alias":"なんば","canonical":"難波"},
        {"alias":"薩摩河内","canonical":"薩摩川内"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ aliasテンプレ作成: {path}")

def load_alias_map(path=ALIAS_CSV):
    ensure_alias_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["alias"] = df["alias"].astype(str).str.strip()
        df["canonical"] = df["canonical"].astype(str).str.strip()
        return {a:c for a,c in zip(df["alias"], df["canonical"]) if a and c}
    except Exception as e:
        print("⚠️ alias読み込み失敗。aliasなしで続行:", e)
        return {}

ALIAS_MAP = load_alias_map(ALIAS_CSV)

def apply_alias(s: str) -> str:
    if s is None:
        return ""
    t = str(s).strip()
    if not t:
        return ""
    return ALIAS_MAP.get(t, t)

# ======================================================
# ★連動グループ定義（CSV）
# ======================================================
GROUP_CSV = "市区分_group.csv"

def ensure_group_template(path=GROUP_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"city_key":"木更津","group_key":"木更津_市原","cap":"2"},
        {"city_key":"市原","group_key":"木更津_市原","cap":"2"},
        {"city_key":"豊中","group_key":"豊中_吹田","cap":"2"},
        {"city_key":"吹田","group_key":"豊中_吹田","cap":"2"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ groupテンプレ作成: {path}")

def load_group_map(path=GROUP_CSV):
    ensure_group_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["city_key"]  = df["city_key"].astype(str).str.strip()
        df["group_key"] = df["group_key"].astype(str).str.strip()
        df["cap"] = df["cap"].astype(str).str.strip()
        city_to_group = {}
        group_cap = {}
        for _, r in df.iterrows():
            ck = r["city_key"]; gk = r["group_key"]
            if ck and gk:
                city_to_group[ck] = gk
                if r["cap"]:
                    try:
                        group_cap[gk] = int(float(r["cap"]))
                    except:
                        pass
        return city_to_group, group_cap
    except Exception as e:
        print("⚠️ group読み込み失敗。groupなしで続行:", e)
        return {}, {}

CITY_TO_GROUP, GROUP_CAP = load_group_map(GROUP_CSV)

def group_of_citykey(ck: str) -> str:
    if not ck:
        return ""
    return CITY_TO_GROUP.get(ck, ck)

# ======================================================
# 1) 正規化・読みキー・誤字ゆれ生成
# ======================================================
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def muni_base(name: str) -> str:
    if name is None:
        return ""
    s = str(name).strip()
    s = re.sub(r"(都|道|府|県)$", "", s)
    s = re.sub(r"(市|区|町|村)$", "", s)
    return s

def to_katakana_reading(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip()
    if not s:
        return ""
    s2 = re.sub(r"[ \t\r\n\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)

    if re.search(r"[ぁ-んァ-ン]", s2):
        s2 = jaconv.normalize(jaconv.hira2kata(s2))
        s2 = re.sub(r"[^ァ-ン0-9A-Z]", "", s2)
        return s2

    yomi_parts = []
    for w in tagger(s2):
        feat = w.feature
        reading = None
        for k in ["reading", "kana", "pron"]:
            if hasattr(feat, k):
                reading = getattr(feat, k)
                break
        if not reading or reading == "*":
            reading = w.surface
        yomi_parts.append(reading)

    yomi = "".join(yomi_parts)
    yomi = jaconv.normalize(jaconv.hira2kata(yomi))
    yomi = re.sub(r"[^ァ-ン0-9A-Z]", "", yomi)
    return yomi

CONFUSION = {
    "川": ["河"], "河": ["川"],
    "崎": ["﨑"], "﨑": ["崎"],
    "ヶ": ["ケ"], "ケ": ["ヶ"],
    "斉": ["齋", "斎"], "齋": ["斉", "斎"], "斎": ["斉", "齋"],
    "邊": ["辺", "邉"], "邉": ["辺", "邊"], "辺": ["邊", "邉"],
}

def gen_variants(s: str, limit=12):
    if s is None:
        return [""]
    s = str(s)
    vars_ = {s}
    for a, bs in CONFUSION.items():
        if a in s:
            new_set = set(vars_)
            for v in vars_:
                for b in bs:
                    new_set.add(v.replace(a, b))
            vars_ = new_set
        if len(vars_) >= limit:
            break
    return list(vars_)[:limit]

def best_ratio(a, b):
    return difflib.SequenceMatcher(None, a, b).ratio()

# ======================================================
# ★束ね分解 + D列ヒント（ただし実会場のみ）
# ======================================================
DELIMS = r"[・／/、,＋+＆&\s　]+"

def strip_annotations(s: str) -> str:
    if not s:
        return ""
    t = str(s)
    t = re.split(r"[（(]", t, maxsplit=1)[0]
    t = re.split(r"(会場|要検討|検討|確定)", t, maxsplit=1)[0]
    return t.strip()

def split_city_tokens(raw: str):
    t = strip_annotations(raw)
    if not t:
        return []
    parts = [p.strip() for p in re.split(DELIMS, t) if p and p.strip()]
    out = []
    for p in parts:
        p2 = apply_alias(p)
        b = muni_base(p2)
        if b:
            out.append(b)
    seen=set(); uniq=[]
    for x in out:
        if x not in seen:
            uniq.append(x); seen.add(x)
    return uniq

def find_city_tokens_in_text(text: str, candidates_city_keys):
    if not text:
        return []
    tn = norm(text)
    if not tn:
        return []
    hits=[]
    for ck in candidates_city_keys:
        nk = norm(ck)
        if nk and nk in tn:
            hits.append(ck)
    seen=set(); out=[]
    for h in hits:
        if h not in seen:
            out.append(h); seen.add(h)
    return out

def is_placeholder_venue(v):
    if v is None:
        return True
    s = str(v).strip()
    if s == "":
        return True
    return ("会場要検討" in s) or ("要検討" in s)

EXCLUDE_AJ_BASE_VENUES = ("AJ日本橋", "AJ秋葉原")
EXCLUDE_AJ_COUNT_KEYWORDS = ("萌え", "イラスト", "JIF")

def is_excluded_aj_base_venue(venue_raw):
    s = "" if venue_raw is None else str(venue_raw)
    s = re.sub(r"[\s　]+", "", s)
    for base in EXCLUDE_AJ_BASE_VENUES:
        if s == base:
            return True
        if s.startswith(base):
            tail = s[len(base):]
            if tail and any(k in tail for k in EXCLUDE_AJ_COUNT_KEYWORDS):
                return False
            return True
    return False

def is_fixed_row(city_raw, venue_raw):
    # 履歴として数える＝実会場だけ（AJ日本橋/AJ秋葉原の素開催は除外）
    if is_placeholder_venue(venue_raw):
        return False
    if is_excluded_aj_base_venue(venue_raw):
        return False
    return True

# ======================================================
# 2) 東/西/九州 の都道府県範囲（＋北=東）
# ======================================================
EAST_PREF_NUMS  = set(list(range(1, 17)) + [19,20,21,22,23,24])
WEST_PREF_NUMS  = set([17,18] + list(range(25, 34)) + [31,32] + [36,37,38,39])
KYUSHU_PREF_NUMS= set([34,35] + list(range(40, 47)))

def norm_area_label(x):
    s = (str(x).strip() if x is not None else "")
    if s == "北":
        return "東"
    return s

def pref_num(pref_code_str: str):
    if not pref_code_str:
        return None
    s = str(pref_code_str).strip()
    m = re.match(r"^(\d{2})", s)
    return int(m.group(1)) if m else None

def area_allowed(area_label: str, pref_code_str: str) -> bool:
    al = norm_area_label(area_label)
    pn = pref_num(pref_code_str)
    if pn is None:
        return True
    if al == "東":
        return pn in EAST_PREF_NUMS
    if al == "西":
        return pn in WEST_PREF_NUMS
    if al == "九州":
        return pn in KYUSHU_PREF_NUMS
    return True

# ======================================================
# 3) 補助関数
# ======================================================
def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, group_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if group_key and group_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in {1}:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("　"," ").replace(" ", "")  # 空白吸収（A J対策）
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]

        area_label = "" if pd.isna(header[COL_AREA_OR_KIND]) else str(header[COL_AREA_OR_KIND]).strip()
        kind = kind_norm(detail[COL_AREA_OR_KIND])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "area_label": area_label,
            "kind": kind, "city_raw": city, "venue_raw": venue, "pref_code": pref
        })
    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

# ======================================================
# 4) 統計量（例外増枠用 + 正規化基準）
# ======================================================
stats = pd.read_excel(STATS_XLSX, sheet_name=0)
stats.columns = [str(c).strip() for c in stats.columns]
muni_col = "市区町村" if "市区町村" in stats.columns else stats.columns[0]

stats_base_list = stats[muni_col].astype(str).map(muni_base).map(str.strip)
stats_base_list = stats_base_list[stats_base_list != ""].dropna().unique().tolist()

stats_norm_to_base = {norm(x): x for x in stats_base_list if x}
stats_norms = list(stats_norm_to_base.keys())

def canonize_city_key(city_key_raw: str):
    if not city_key_raw:
        return "", 0.0
    x = apply_alias(city_key_raw)
    b = muni_base(x)
    bn = norm(b)

    if bn in stats_norm_to_base and bn:
        return stats_norm_to_base[bn], 1.0

    for v in gen_variants(b):
        vn = norm(v)
        if vn in stats_norm_to_base and vn:
            return stats_norm_to_base[vn], 0.995

    best_base, best_r = None, 0.0
    for v in [b] + gen_variants(b):
        vn = norm(v)
        for sn in stats_norms:
            r = best_ratio(vn, sn)
            if r > best_r:
                best_r = r
                best_base = stats_norm_to_base[sn]
    if best_base and best_r >= CONFIG["CANON_CUTOFF"]:
        return best_base, best_r

    return b, best_r

# ======================================================
# 5) 四半期表読み込み
# ======================================================
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ======================================================
# 6) 地域別回数読み込み → ★束ね表記を分解して候補キーを作る（重要修正）
#    - "木更津・市原" を ["木更津","市原"] に展開して両方を候補に入れる
#    - plan_count は二重計上しない（先頭だけ count、残りは 0）
# ======================================================
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None, dtype=str)

plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()].copy()
plan_rows["pref_parent"] = plan_rows[0].ffill()

plan_rows["city_key_raw"] = plan_rows[1].astype(str).str.strip()
plan_rows["plan_count_raw"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)

expanded = []
for _, rr in plan_rows.iterrows():
    pref_parent = rr["pref_parent"]
    raw_city = rr["city_key_raw"]
    cnt = int(rr["plan_count_raw"])

    # ★束ねを分解（あなたの split_city_tokens を使う）
    toks = split_city_tokens(raw_city)
    if not toks:
        toks = [raw_city.strip()]

    # ★canonical化（統計量ベース）
    canon_list = []
    for t in toks:
        ck, _ = canonize_city_key(t)
        ck = (ck or "").strip()
        if ck:
            canon_list.append(ck)

    if not canon_list:
        continue

    # ★二重計上防止：先頭だけ cnt、残り 0
    for i, ck in enumerate(canon_list):
        expanded.append({
            "pref_parent": pref_parent,
            "city_key": ck,
            "plan_count": cnt if i == 0 else 0,
            "city_key_raw": raw_city
        })

region_master = pd.DataFrame(expanded)
# city_key が同一になるものは集約
region_master = region_master.groupby(["pref_parent", "city_key"], as_index=False)["plan_count"].sum()

# 候補キー（＝確定枠から match するためにも必要）
PLAN_CITY_KEYS = sorted(region_master["city_key"].dropna().astype(str).str.strip().unique().tolist())
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# ======================================================
# ★連動グループで plan_count を組み直す（cap指定があればそれ優先）
# ======================================================
group_members = {}
for ck in PLAN_CITY_KEYS:
    gk = group_of_citykey(ck)
    group_members.setdefault(gk, []).append(ck)

plan_count_by_group = {}
for gk, members in group_members.items():
    if gk in GROUP_CAP and GROUP_CAP[gk] > 0:
        plan_count_by_group[gk] = GROUP_CAP[gk]
    else:
        plan_count_by_group[gk] = int(sum(plan_count_by_city.get(m, 0) for m in members))

# ======================================================
# 8) 都道府県コード推定に使う pref_by_city / pref_by_group
# （ここも region_master を使って作り直し）
# ======================================================
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

pref_by_group = {}
for gk, members in group_members.items():
    pc = ""
    for mck in members:
        pc = pref_by_city.get(mck, "")
        if pc:
            break
    pref_by_group[gk] = pc

# gap 計算
pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
group_gap = {gk: gap_weeks_from_count(int(cnt)) for gk, cnt in plan_count_by_group.items()}


# ======================================================
# 7) 候補側の索引（PLAN_CITY_KEYSでマッチ）
# ======================================================
city_norm_map = {ck: norm(ck) for ck in PLAN_CITY_KEYS}
norm_to_city = {}
for ck, nk in city_norm_map.items():
    if nk:
        norm_to_city.setdefault(nk, []).append(ck)

reading_to_city = {}
for ck in PLAN_CITY_KEYS:
    rd = to_katakana_reading(ck)
    if rd:
        reading_to_city.setdefault(rd, []).append(ck)

def choose_best_by_fuzzy(query_base, cands):
    qn = norm(query_base)
    best_ck, best_r = None, 0.0
    for ck in cands:
        r = best_ratio(qn, norm(ck))
        if r > best_r:
            best_r = r
            best_ck = ck
    return best_ck, best_r

def match_city_key(city_name: str):
    if not city_name:
        return None, 0.0
    x = apply_alias(city_name)
    b = muni_base(x)
    bn = norm(b)

    if bn in norm_to_city and bn:
        cands = norm_to_city[bn]
        if len(cands) == 1:
            return cands[0], 1.0
        ck, rr = choose_best_by_fuzzy(b, cands)
        return ck, max(0.97, rr)

    for v in gen_variants(b):
        vn = norm(v)
        if vn in norm_to_city and vn:
            cands = norm_to_city[vn]
            if len(cands) == 1:
                return cands[0], 0.995
            ck, rr = choose_best_by_fuzzy(v, cands)
            return ck, max(0.95, rr)

    rd = to_katakana_reading(b)
    cands = reading_to_city.get(rd, [])
    if len(cands) == 1:
        return cands[0], 0.99
    elif len(cands) >= 2:
        ck, rr = choose_best_by_fuzzy(b, cands)
        if ck:
            return ck, max(0.93, rr)

    best_ck, best_r = None, 0.0
    for ck in PLAN_CITY_KEYS:
        r = best_ratio(bn, city_norm_map.get(ck, ""))
        if r > best_r:
            best_r = r
            best_ck = ck
    if best_ck and best_r >= CONFIG["FUZZY_CUTOFF"]:
        return best_ck, best_r
    return None, best_r

def match_city_keys_multi(city_raw: str, venue_raw: str = ""):
    keys = []
    tokens = split_city_tokens(city_raw)
    for tok in tokens:
        ck, _ = match_city_key(tok)
        if ck:
            keys.append(ck)

    # D列ヒントは「実会場」のときだけ
    if (not tokens or len(keys) == 0) and venue_raw and (not is_placeholder_venue(venue_raw)):
        vtxt = str(venue_raw)
        vtxt = vtxt.split("（B:", 1)[0].split("(B:", 1)[0]
        hits = find_city_tokens_in_text(vtxt, PLAN_CITY_KEYS)
        keys.extend(hits)

    seen=set(); uniq=[]
    for k in keys:
        if k and k not in seen:
            uniq.append(k); seen.add(k)
    return uniq

def add_city_key(df):
    keys1, scores = [], []
    key_lists = []
    group_lists = []
    for city_raw, venue_raw in zip(df["city_raw"].tolist(), df["venue_raw"].tolist()):
        klist = match_city_keys_multi(city_raw, venue_raw)
        key_lists.append(klist)
        k0 = klist[0] if klist else None
        keys1.append(k0)

        glist = []
        for ck in klist:
            gk = group_of_citykey(ck)
            if gk and gk not in glist:
                glist.append(gk)
        group_lists.append(glist)

        if k0:
            toks = split_city_tokens(city_raw)
            probe = toks[0] if toks else city_raw
            _, sc = match_city_key(probe)
            scores.append(sc)
        else:
            scores.append(0.0)

    out = df.copy()
    out["city_key"] = keys1
    out["city_keys"] = key_lists
    out["group_keys"] = group_lists
    out["match_score"] = scores
    return out

scheduled42_all = add_city_key(b42.copy())
scheduled43_all = add_city_key(b43.copy())

# 履歴に入れるのは「実会場」だけ
scheduled42 = scheduled42_all[scheduled42_all.apply(lambda r: is_fixed_row(r["city_raw"], r["venue_raw"]), axis=1)].copy()
scheduled43 = scheduled43_all[scheduled43_all.apply(lambda r: is_fixed_row(r["city_raw"], r["venue_raw"]), axis=1)].copy()

# 空きAJ枠：city空 かつ (venue空 or 会場要検討)
open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"].apply(is_placeholder_venue))
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ======================================================
# 8) 都道府県コード推定
# ======================================================
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

pref_by_group = {}
for gk, members in group_members.items():
    pc = ""
    for mck in members:
        pc = pref_by_city.get(mck, "")
        if pc:
            break
    pref_by_group[gk] = pc

pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
group_gap = {gk: gap_weeks_from_count(int(cnt)) for gk, cnt in plan_count_by_group.items()}

# ======================================================
# 9) ★最短距離(min distance)判定のための「確定開催位置リスト」
# ======================================================
OFFSET_43 = len(week_order_42)

scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43

scheduled_all_fixed = pd.concat([scheduled42, scheduled43], ignore_index=True)

# ★県/グループごとに「確定開催(abs_pos)」を全部持つ（未来も含む）
fixed_pos_pref = {}
fixed_pos_group = {}

for _, e in scheduled_all_fixed.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = str(e.get("pref_code","") or "").strip()
    if pc:
        fixed_pos_pref.setdefault(pc, []).append(ap)
    glist = e.get("group_keys", [])
    if isinstance(glist, list):
        for gk in glist:
            if gk:
                fixed_pos_group.setdefault(gk, []).append(ap)

for k in list(fixed_pos_pref.keys()):
    fixed_pos_pref[k] = sorted(set(fixed_pos_pref[k]))
for k in list(fixed_pos_group.keys()):
    fixed_pos_group[k] = sorted(set(fixed_pos_group[k]))

def min_dist(apos: int, sorted_positions: list):
    """sorted_positionsに対する apos の最短距離（前後どちらも）"""
    if not sorted_positions:
        return 999
    i = bisect.bisect_left(sorted_positions, apos)
    best = 10**9
    if i < len(sorted_positions):
        best = min(best, abs(sorted_positions[i] - apos))
    if i > 0:
        best = min(best, abs(sorted_positions[i-1] - apos))
    return best if best != 10**9 else 999

def insort_unique(lst: list, x: int):
    """ソート済みに保って重複なく追加"""
    i = bisect.bisect_left(lst, x)
    if i < len(lst) and lst[i] == x:
        return
    lst.insert(i, x)

# 43期の「確定分」だけで消化数（グループ）
scheduled_counts_43_group = {}
for _, e in scheduled43.iterrows():
    glist = e.get("group_keys", [])
    if isinstance(glist, list):
        for gk in glist:
            if gk:
                scheduled_counts_43_group[gk] = scheduled_counts_43_group.get(gk, 0) + 1

# ======================================================
# 10) 例外増枠プール（従来どおり）
# ======================================================
stats2 = stats.copy()
stats2[muni_col] = stats2[muni_col].astype(str).str.strip()

class_col = stats2.columns[9] if len(stats2.columns) >= 10 else stats2.columns[-1]
stats2[class_col] = pd.to_numeric(stats2[class_col], errors="coerce")

pop_col = next((c for c in stats2.columns if "人口" in str(c)), None)
if pop_col is None:
    num_cols=[]
    for c in stats2.columns:
        s = pd.to_numeric(stats2[c], errors="coerce")
        if s.notna().sum() > 0:
            num_cols.append((c, float(s.max(skipna=True))))
    pop_col = sorted(num_cols, key=lambda x: x[1], reverse=True)[0][0] if num_cols else stats2.columns[-1]
stats2[pop_col] = pd.to_numeric(stats2[pop_col], errors="coerce")

ovr = stats2[(stats2[class_col] == CONFIG["OVR_CLASS_J_VALUE"]) & (stats2[pop_col] >= CONFIG["OVR_MIN_POP"])].copy()

ovr_candidates = {}
for name in ovr[muni_col].tolist():
    ck, sc = match_city_key(name)
    if ck:
        pop_val = float(ovr.loc[ovr[muni_col] == name, pop_col].iloc[0])
        if ck not in ovr_candidates or sc > ovr_candidates[ck]["match_score"]:
            ovr_candidates[ck] = {"name_raw": name, "match_score": sc, "pop": pop_val}

OVR_POOL = set(ovr_candidates.keys())
print(f"✅ 例外増枠プール: {len(OVR_POOL)}（J列={class_col}==1 & 人口列={pop_col}>=18万）")

# ======================================================
# 11) 集客率（あれば）
# ======================================================
rate_col = next((c for c in stats2.columns if ("集客率" in str(c) or "来場率" in str(c) or "動員率" in str(c))), None)
city_rate = {}
if rate_col is not None:
    tmp = stats2[[muni_col, rate_col]].copy()
    tmp["rate"] = pd.to_numeric(tmp[rate_col], errors="coerce")
    tmp = tmp.dropna(subset=["rate"])
    muni_rate_norm = {norm(muni_base(row[muni_col])): float(row["rate"]) for _, row in tmp.iterrows()}
    for ck in PLAN_CITY_KEYS:
        cn = norm(ck)
        if cn in muni_rate_norm:
            city_rate[ck] = muni_rate_norm[cn]

def percentile_need(values_dict):
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

city_pct, city_need = percentile_need(city_rate)

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "不明"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def fmt_pct(p):
    if p is None:
        return "不明"
    return f"下位{p*100:.0f}%"

# ======================================================
# 理由（min distance表示）
# ======================================================
def build_reason(variant, week_id, area_label, ck, gk, pc,
                 need_gap_p, dp, need_gap_g, dg,
                 remaining_plan_g, overflow_used, overflow_meta,
                 same_week_pref_hit, same_week_group_hit, relax_mode, score):
    lines=[]
    lines.append(f"【案{variant}】{week_id}／{area_label} の空きAJ枠に対して選定。")
    lines.append(f"0) 地域フィルタ：{area_label} の範囲内（県コード={pc}）のみ。")
    lines.append(f"   連動グループ：{gk}（例：木更津+市原=合計年2回、豊中+吹田=合計年2回）")

    if not overflow_used:
        lines.append(f"1) 計画回数（グループ残）：{gk} 残り {remaining_plan_g} 回 → 計画内で採用（候補={ck}）")
    else:
        pop = overflow_meta.get("pop", None)
        pop_txt = f"{int(pop):,}人" if isinstance(pop, (int,float)) and not math.isnan(pop) else "不明"
        nm = overflow_meta.get("name_raw","") or ck
        lines.append("1) 計画内で埋まらず、例外増枠を使用。")
        lines.append(f"   例外：J列=1 & 人口>=18万 → {nm}（人口={pop_txt}）")

    lines.append(
        f"2) スパン(min距離)：県=必要{need_gap_p}週/最短{dp}週、グループ=必要{need_gap_g}週/最短{dg}週"
        + (f"（緩和={relax_mode}）" if relax_mode else "")
    )

    if ck in city_rate:
        lines.append(f"3) 集客率：{fmt_rate(city_rate.get(ck))}（{fmt_pct(city_pct.get(ck))}）")
    else:
        lines.append("3) 集客率：データ無し → 回数/スパン優先")

    lines.append("4) 同週回避："
                 + ("同県ペナあり" if same_week_pref_hit else "同県OK")
                 + (" / 同グループペナあり" if same_week_group_hit else ""))

    lines.append(f"【採用】score={score:.2f}")
    return " / ".join(lines)[:32000]

# ======================================================
# 週×都道府県 / 週×グループ（43期確定分のみ）
# ======================================================
week_used_pref_base = {}
week_used_group_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]
    pc = str(e.get("pref_code","") or "").strip()
    if w and pc:
        week_used_pref_base.setdefault(w, set()).add(pc)
    glist = e.get("group_keys", [])
    if w and isinstance(glist, list):
        s = week_used_group_base.setdefault(w, set())
        for gk in glist:
            if gk:
                s.add(gk)

# ======================================================
# 12) プラン生成（A/B/C）…★min distance方式
# ======================================================
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used_in_plan_group = {gk: 0 for gk in plan_count_by_group.keys()}

    # ★「確定位置」を初期値に持つ（未来も含む）
    pos_pref  = {pc: list(lst) for pc, lst in fixed_pos_pref.items()}
    pos_group = {gk: list(lst) for gk, lst in fixed_pos_group.items()}

    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}
    week_used_group = {w:set(s) for w,s in week_used_group_base.items()}

    assigns = []

    def score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow):
        gk = group_of_citykey(ck)
        pc = pref_by_group.get(gk, "") or pref_by_city.get(ck, "")

        if not area_allowed(area_label, pc):
            return None

        plan_cnt_g = plan_count_by_group.get(gk, 0)
        already_g  = scheduled_counts_43_group.get(gk, 0)
        remaining_g = plan_cnt_g - already_g - used_in_plan_group.get(gk, 0)

        overflow_used = False
        overflow_meta = {}

        if remaining_g <= 0:
            if not allow_overflow:
                return None
            if ck not in OVR_POOL:
                return None
            overflow_used = True
            meta = ovr_candidates.get(ck, {})
            overflow_meta = {"name_raw": meta.get("name_raw",""), "pop": meta.get("pop", float("nan"))}

        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_g = group_gap.get(gk, CONFIG["GAP_WEEKS_MIN"])

        dp = min_dist(apos, pos_pref.get(pc, [])) if pc else 999
        dg = min_dist(apos, pos_group.get(gk, [])) if gk else 999

        ok_p = (dp >= need_gap_p)
        ok_g = (dg >= need_gap_g)

        if relax_mode is None:
            if not (ok_p and ok_g):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        if pc and is_snow_blackout(pc, gk, week_id):
            return None

        same_week_pref_hit  = (pc and pc in week_used_pref.get(week_id, set()))
        same_week_group_hit = (gk and gk in week_used_group.get(week_id, set()))

        same_week_pen = (CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0) + \
                        (CONFIG["SAME_WEEK_CITY_PENALTY"] if same_week_group_hit else 0.0)

        overflow_pen = CONFIG["OVR_OVERFLOW_PENALTY"] if overflow_used else 0.0

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        slack_p = dp - need_gap_p
        slack_g = dg - need_gap_g
        need_city = city_need.get(ck, 0.5)

        score = (
            slack_p * W["pref_slack"] +
            slack_g * W["city_slack"] +
            max(remaining_g, 0) * W["unmet_bonus"] * 5.0 +
            (need_city * 10) * W["low_attr_city"] -
            relax_pen - same_week_pen - overflow_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        reason = build_reason(
            variant, week_id, area_label, ck, gk, pc,
            need_gap_p, dp, need_gap_g, dg,
            remaining_g,
            overflow_used, overflow_meta,
            same_week_pref_hit, same_week_group_hit, relax_mode, score
        )
        return {"ck": ck, "gk": gk, "pc": pc, "score": float(score), "reason": reason}

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]
        area_label = slot.get("area_label", "")

        best = None

        # 計画内
        for relax_mode in [None, "Aのみ", "B+A"]:
            for ck in PLAN_CITY_KEYS:
                cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=False)
                if cand and (best is None or cand["score"] > best["score"]):
                    best = cand
            if best is not None:
                break

        # 例外増枠
        if best is None:
            for relax_mode in [None, "Aのみ", "B+A"]:
                for ck in PLAN_CITY_KEYS:
                    cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=True)
                    if cand and (best is None or cand["score"] > best["score"]):
                        best = cand
                if best is not None:
                    break

        if best is None:
            assigns.append({
                "variant": variant, "row_header": int(slot["row_header"]),
                "assign_city_key": "", "pref_code_guess": "", "score": float("-inf"),
                "reason_BT": f"案{variant}: 条件により候補なし（地域={area_label}）"
            })
            continue

        ck = best["ck"]; gk = best["gk"]; pc = best["pc"]

        used_in_plan_group[gk] = used_in_plan_group.get(gk, 0) + 1

        # ★重要：確定(未来含む)+提案の集合に apos を追加（= 次のスロットで min距離が効く）
        if gk:
            pos_group.setdefault(gk, [])
            insort_unique(pos_group[gk], apos)
            week_used_group.setdefault(week_id, set()).add(gk)

        if pc:
            pos_pref.setdefault(pc, [])
            insort_unique(pos_pref[pc], apos)
            week_used_pref.setdefault(week_id, set()).add(pc)

        assigns.append({
            "variant": variant, "row_header": int(slot["row_header"]),
            "assign_city_key": ck, "pref_code_guess": pc, "score": best["score"],
            "reason_BT": best["reason"]
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

def to_map(df, col):
    return {int(r["row_header"]): r[col] for _, r in df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")
A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")
A_reason = to_map(planA, "reason_BT")

# ======================================================
# 13) 書き戻し（43期の空きAJ枠だけ）
# ======================================================
wb = openpyxl.load_workbook(QUARTER_XLSX)

for name in list(wb.sheetnames):
    if name not in {sheet42, sheet43}:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1

    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value

    if not ((c_val is None or str(c_val).strip()=="") and is_placeholder_venue(d_val)):
        continue

    a = str(A_city.get(int(row0), "") or "")
    b = str(B_city.get(int(row0), "") or "")
    c = str(C_city.get(int(row0), "") or "")

    ws43.cell(r, COL_CITY+1).value = a
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b if b else '-'} / C:{c if c else '-'}）"

    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    if (ws43.cell(r, COL_PREF+1).value is None) or (str(ws43.cell(r, COL_PREF+1).value).strip()==""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    ws43.cell(r, COL_REASON_BT+1).value = str(A_reason.get(int(row0), "案A: 理由生成なし"))
    written += 1

wb.save(OUT_QUARTER_ABC)

print("\n✅ 入力:", QUARTER_XLSX)
print("✅ 出力:", OUT_QUARTER_ABC)
print("✅ 43期 空きAJ枠（city空＆会場要検討）:", len(open43_AJ))
print("✅ 書込数:", written)
print("✅ minDist方式：確定(未来含む)＋提案の最短距離でスパン判定 → 6月に寄る問題を止める")
print("✅ グループcap例:", {k:GROUP_CAP.get(k) for k in sorted(GROUP_CAP)[:20]})


✅ aliasテンプレ作成: 市区分_alias.csv
✅ groupテンプレ作成: 市区分_group.csv
✅ 例外増枠プール: 4（J列=Zの4分位（1=赤,2=黄,3=青,4=灰）==1 & 人口列=人口>=18万）

✅ 入力: SA+AJ+共有用_四半期表20240303.xlsx
✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_fix_minDist_groupSpan_OUT.xlsx
✅ 43期 空きAJ枠（city空＆会場要検討）: 131
✅ 書込数: 131
✅ minDist方式：確定(未来含む)＋提案の最短距離でスパン判定 → 6月に寄る問題を止める
✅ グループcap例: {'木更津_市原': 2, '豊中_吹田': 2}


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）
# ★東/西/九州の都道府県範囲フィルタ
# ★alias / 読み / 誤字ゆれ吸収（会場マスタ不使用）
# ★束ね対応（木更津・市原など）＝構成市すべて同一エリア扱い（A仕様：両方1消化）
# ★連動グループ対応（木更津+市原=合計年2回、豊中+吹田=合計年2回 等）
#   → 回数とスパンは group_key 単位で管理（年2回→gap=約26週）
# ★重要修正：
#   1) 「空きAJ枠」判定：city空 + (venue空 or 会場要検討) を空き扱い
#   2) 「開催済み履歴」判定：会場要検討は履歴に入れない（提案は履歴汚染しない）
#   3) D列ヒントは「実会場の時だけ」使う（会場要検討(B/C)から拾わない）
# ============================================

!pip -q install fugashi unidic-lite jaconv

import os, re, math, random, difflib
import pandas as pd
import openpyxl
import jaconv
from fugashi import Tagger
tagger = Tagger()

# ====== 入力 ======
UP4 = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT (4).xlsx"
UP3 = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT (2).xlsx"
UP2 = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT (1).xlsx"
BASE = "SA+AJ+共有用_四半期表20240303.xlsx"

# どれを入力にしてもOK（提案は履歴に数えない設計）
QUARTER_XLSX = UP4 if os.path.exists(UP4) else (UP3 if os.path.exists(UP3) else (UP2 if os.path.exists(UP2) else BASE))

REGION_XLSX      = "43期地域別会場回数.xlsx"
STATS_XLSX       = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx"

# ====== 列（0-based）=====
COL_WEEK = 0
COL_AREA_OR_KIND = 1     # ヘッダ行=東/西/九州、次行=AJ/合同/SA
COL_CITY = 2
COL_VENUE = 3
COL_PREF = 5
COL_REASON_BT = 72

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,
    "SAME_WEEK_PREF_PENALTY": 80.0,
    "SAME_WEEK_CITY_PENALTY": 60.0,   # 同週同「連動グループ」ペナ

    "FUZZY_CUTOFF": 0.86,
    "CANON_CUTOFF": 0.93,

    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    "OVR_CLASS_J_VALUE": 1,
    "OVR_MIN_POP": 180000,
    "OVR_OVERFLOW_PENALTY": 15.0,

    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2,
              "low_attr_city": 2.0, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0,
              "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2,
              "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ======================================================
# 0) alias辞書（CSV）
# ======================================================
ALIAS_CSV = "市区分_alias.csv"

def ensure_alias_template(path=ALIAS_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"alias":"なんば","canonical":"難波"},
        {"alias":"薩摩河内","canonical":"薩摩川内"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ aliasテンプレ作成: {path}")

def load_alias_map(path=ALIAS_CSV):
    ensure_alias_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["alias"] = df["alias"].astype(str).str.strip()
        df["canonical"] = df["canonical"].astype(str).str.strip()
        return {a:c for a,c in zip(df["alias"], df["canonical"]) if a and c}
    except Exception as e:
        print("⚠️ alias読み込み失敗。aliasなしで続行:", e)
        return {}

ALIAS_MAP = load_alias_map(ALIAS_CSV)

def apply_alias(s: str) -> str:
    if s is None:
        return ""
    t = str(s).strip()
    if not t:
        return ""
    return ALIAS_MAP.get(t, t)

# ======================================================
# ★連動グループ定義（CSV）
# ======================================================
GROUP_CSV = "市区分_group.csv"

def ensure_group_template(path=GROUP_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"city_key":"木更津","group_key":"木更津_市原","cap":"2"},
        {"city_key":"市原","group_key":"木更津_市原","cap":"2"},
        {"city_key":"豊中","group_key":"豊中_吹田","cap":"2"},
        {"city_key":"吹田","group_key":"豊中_吹田","cap":"2"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ groupテンプレ作成: {path}")

def load_group_map(path=GROUP_CSV):
    ensure_group_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["city_key"]  = df["city_key"].astype(str).str.strip()
        df["group_key"] = df["group_key"].astype(str).str.strip()
        df["cap"] = df["cap"].astype(str).str.strip()
        city_to_group = {}
        group_cap = {}
        for _, r in df.iterrows():
            ck = r["city_key"]; gk = r["group_key"]
            if ck and gk:
                city_to_group[ck] = gk
                if r["cap"]:
                    try:
                        group_cap[gk] = int(float(r["cap"]))
                    except:
                        pass
        return city_to_group, group_cap
    except Exception as e:
        print("⚠️ group読み込み失敗。groupなしで続行:", e)
        return {}, {}

CITY_TO_GROUP, GROUP_CAP = load_group_map(GROUP_CSV)

def group_of_citykey(ck: str) -> str:
    if not ck:
        return ""
    return CITY_TO_GROUP.get(ck, ck)

# ======================================================
# 1) 正規化・読みキー・誤字ゆれ生成
# ======================================================
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def muni_base(name: str) -> str:
    if name is None:
        return ""
    s = str(name).strip()
    s = re.sub(r"(都|道|府|県)$", "", s)
    s = re.sub(r"(市|区|町|村)$", "", s)
    return s

def to_katakana_reading(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip()
    if not s:
        return ""
    s2 = re.sub(r"[ \t\r\n\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)

    if re.search(r"[ぁ-んァ-ン]", s2):
        s2 = jaconv.normalize(jaconv.hira2kata(s2))
        s2 = re.sub(r"[^ァ-ン0-9A-Z]", "", s2)
        return s2

    yomi_parts = []
    for w in tagger(s2):
        feat = w.feature
        reading = None
        for k in ["reading", "kana", "pron"]:
            if hasattr(feat, k):
                reading = getattr(feat, k)
                break
        if not reading or reading == "*":
            reading = w.surface
        yomi_parts.append(reading)

    yomi = "".join(yomi_parts)
    yomi = jaconv.normalize(jaconv.hira2kata(yomi))
    yomi = re.sub(r"[^ァ-ン0-9A-Z]", "", yomi)
    return yomi

CONFUSION = {
    "川": ["河"], "河": ["川"],
    "崎": ["﨑"], "﨑": ["崎"],
    "ヶ": ["ケ"], "ケ": ["ヶ"],
    "斉": ["齋", "斎"], "齋": ["斉", "斎"], "斎": ["斉", "齋"],
    "邊": ["辺", "邉"], "邉": ["辺", "邊"], "辺": ["邊", "邉"],
}

def gen_variants(s: str, limit=12):
    if s is None:
        return [""]
    s = str(s)
    vars_ = {s}
    for a, bs in CONFUSION.items():
        if a in s:
            new_set = set(vars_)
            for v in vars_:
                for b in bs:
                    new_set.add(v.replace(a, b))
            vars_ = new_set
        if len(vars_) >= limit:
            break
    return list(vars_)[:limit]

def best_ratio(a, b):
    return difflib.SequenceMatcher(None, a, b).ratio()

# ======================================================
# ★束ね分解 + D列ヒント（ただし実会場のみ）
# ======================================================
DELIMS = r"[・／/、,＋+＆&\s　]+"

def strip_annotations(s: str) -> str:
    if not s:
        return ""
    t = str(s)
    t = re.split(r"[（(]", t, maxsplit=1)[0]
    t = re.split(r"(会場|要検討|検討|確定)", t, maxsplit=1)[0]
    return t.strip()

def split_city_tokens(raw: str):
    t = strip_annotations(raw)
    if not t:
        return []
    parts = [p.strip() for p in re.split(DELIMS, t) if p and p.strip()]
    out = []
    for p in parts:
        p2 = apply_alias(p)
        b = muni_base(p2)
        if b:
            out.append(b)
    seen=set(); uniq=[]
    for x in out:
        if x not in seen:
            uniq.append(x); seen.add(x)
    return uniq

def find_city_tokens_in_text(text: str, candidates_city_keys):
    if not text:
        return []
    tn = norm(text)
    if not tn:
        return []
    hits=[]
    for ck in candidates_city_keys:
        nk = norm(ck)
        if nk and nk in tn:
            hits.append(ck)
    seen=set(); out=[]
    for h in hits:
        if h not in seen:
            out.append(h); seen.add(h)
    return out

def is_placeholder_venue(v):
    """会場要検討（B/C併記含む）や空欄は '未確定' 扱い"""
    if v is None:
        return True
    s = str(v).strip()
    if s == "":
        return True
    return ("会場要検討" in s) or ("要検討" in s)

EXCLUDE_AJ_BASE_VENUES = ("AJ日本橋", "AJ秋葉原")
EXCLUDE_AJ_COUNT_KEYWORDS = ("萌え", "イラスト", "JIF")

def is_excluded_aj_base_venue(venue_raw):
    s = "" if venue_raw is None else str(venue_raw)
    s = re.sub(r"[\s　]+", "", s)
    for base in EXCLUDE_AJ_BASE_VENUES:
        if s == base:
            return True
        if s.startswith(base):
            tail = s[len(base):]
            if tail and any(k in tail for k in EXCLUDE_AJ_COUNT_KEYWORDS):
                return False
            return True
    return False

def is_fixed_row(city_raw, venue_raw):
    # 履歴として数える＝実会場だけ（AJ日本橋/AJ秋葉原の素開催は除外）
    if is_placeholder_venue(venue_raw):
        return False
    if is_excluded_aj_base_venue(venue_raw):
        return False
    return True

# ======================================================
# 2) 東/西/九州 の都道府県範囲（＋北=東に寄せ）
# ======================================================
EAST_PREF_NUMS  = set(list(range(1, 17)) + [19,20,21,22,23,24])
WEST_PREF_NUMS  = set([17,18] + list(range(25, 34)) + [31,32] + [36,37,38,39])
KYUSHU_PREF_NUMS= set([34,35] + list(range(40, 47)))

def norm_area_label(x):
    s = (str(x).strip() if x is not None else "")
    if s == "北":
        return "東"
    return s

def pref_num(pref_code_str: str):
    if not pref_code_str:
        return None
    s = str(pref_code_str).strip()
    m = re.match(r"^(\d{2})", s)
    return int(m.group(1)) if m else None

def area_allowed(area_label: str, pref_code_str: str) -> bool:
    al = norm_area_label(area_label)
    pn = pref_num(pref_code_str)
    if pn is None:
        return True
    if al == "東":
        return pn in EAST_PREF_NUMS
    if al == "西":
        return pn in WEST_PREF_NUMS
    if al == "九州":
        return pn in KYUSHU_PREF_NUMS
    return True  # 東西九州以外は落とさない

# ======================================================
# 3) 補助関数
# ======================================================
def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, group_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if group_key and group_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in {1}:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("　"," ").replace(" ", "")  # ★空白吸収（A J対策）
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]

        area_label = "" if pd.isna(header[COL_AREA_OR_KIND]) else str(header[COL_AREA_OR_KIND]).strip()
        kind = kind_norm(detail[COL_AREA_OR_KIND])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "area_label": area_label,
            "kind": kind, "city_raw": city, "venue_raw": venue, "pref_code": pref
        })
    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

# ======================================================
# 4) 統計量（例外増枠用 + 正規化基準）
# ======================================================
stats = pd.read_excel(STATS_XLSX, sheet_name=0)
stats.columns = [str(c).strip() for c in stats.columns]
muni_col = "市区町村" if "市区町村" in stats.columns else stats.columns[0]

stats_base_list = stats[muni_col].astype(str).map(muni_base).map(str.strip)
stats_base_list = stats_base_list[stats_base_list != ""].dropna().unique().tolist()

stats_norm_to_base = {norm(x): x for x in stats_base_list if x}
stats_norms = list(stats_norm_to_base.keys())

def canonize_city_key(city_key_raw: str):
    if not city_key_raw:
        return "", 0.0
    x = apply_alias(city_key_raw)
    b = muni_base(x)
    bn = norm(b)

    if bn in stats_norm_to_base and bn:
        return stats_norm_to_base[bn], 1.0

    for v in gen_variants(b):
        vn = norm(v)
        if vn in stats_norm_to_base and vn:
            return stats_norm_to_base[vn], 0.995

    best_base, best_r = None, 0.0
    for v in [b] + gen_variants(b):
        vn = norm(v)
        for sn in stats_norms:
            r = best_ratio(vn, sn)
            if r > best_r:
                best_r = r
                best_base = stats_norm_to_base[sn]
    if best_base and best_r >= CONFIG["CANON_CUTOFF"]:
        return best_base, best_r

    return b, best_r

# ======================================================
# 5) 四半期表読み込み
# ======================================================
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ======================================================
# 6) 地域別回数読み込み → 市区分キー正規化して集約
# ======================================================
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None, dtype=str)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()].copy()
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key_raw"] = plan_rows[1].astype(str).str.strip()
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)

plan_rows["city_key"] = [canonize_city_key(x)[0] for x in plan_rows["city_key_raw"].tolist()]
region_master = plan_rows.groupby(["pref_parent","city_key"], as_index=False)["plan_count"].sum()

city_keys = region_master["city_key"].tolist()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# ===== 連動グループで plan_count を組み直す（cap指定があればそれ優先）=====
group_members = {}
for ck in city_keys:
    gk = group_of_citykey(ck)
    group_members.setdefault(gk, []).append(ck)

plan_count_by_group = {}
for gk, members in group_members.items():
    if gk in GROUP_CAP and GROUP_CAP[gk] > 0:
        plan_count_by_group[gk] = GROUP_CAP[gk]
    else:
        plan_count_by_group[gk] = int(sum(plan_count_by_city.get(m, 0) for m in members))

# ======================================================
# 7) 候補側の索引（表記norm + 読み）
# ======================================================
city_norm_map = {ck: norm(ck) for ck in city_keys}
norm_to_city = {}
for ck, nk in city_norm_map.items():
    if nk:
        norm_to_city.setdefault(nk, []).append(ck)

reading_to_city = {}
for ck in city_keys:
    rd = to_katakana_reading(ck)
    if rd:
        reading_to_city.setdefault(rd, []).append(ck)

def choose_best_by_fuzzy(query_base, cands):
    qn = norm(query_base)
    best_ck, best_r = None, 0.0
    for ck in cands:
        r = best_ratio(qn, norm(ck))
        if r > best_r:
            best_r = r
            best_ck = ck
    return best_ck, best_r

def match_city_key(city_name: str):
    if not city_name:
        return None, 0.0
    x = apply_alias(city_name)
    b = muni_base(x)
    bn = norm(b)

    if bn in norm_to_city and bn:
        cands = norm_to_city[bn]
        if len(cands) == 1:
            return cands[0], 1.0
        ck, rr = choose_best_by_fuzzy(b, cands)
        return ck, max(0.97, rr)

    for v in gen_variants(b):
        vn = norm(v)
        if vn in norm_to_city and vn:
            cands = norm_to_city[vn]
            if len(cands) == 1:
                return cands[0], 0.995
            ck, rr = choose_best_by_fuzzy(v, cands)
            return ck, max(0.95, rr)

    rd = to_katakana_reading(b)
    cands = reading_to_city.get(rd, [])
    if len(cands) == 1:
        return cands[0], 0.99
    elif len(cands) >= 2:
        ck, rr = choose_best_by_fuzzy(b, cands)
        if ck:
            return ck, max(0.93, rr)

    best_ck, best_r = None, 0.0
    for ck in city_keys:
        r = best_ratio(bn, city_norm_map.get(ck, ""))
        if r > best_r:
            best_r = r
            best_ck = ck
    if best_ck and best_r >= CONFIG["FUZZY_CUTOFF"]:
        return best_ck, best_r
    return None, best_r

def match_city_keys_multi(city_raw: str, venue_raw: str = ""):
    keys = []
    tokens = split_city_tokens(city_raw)
    for tok in tokens:
        ck, _ = match_city_key(tok)
        if ck:
            keys.append(ck)

    # ★D列ヒントは「実会場のときだけ」使う（会場要検討(B/C)から拾うと履歴が壊れる）
    if (not tokens or len(keys) == 0) and venue_raw and (not is_placeholder_venue(venue_raw)):
        # B/C併記みたいなのが混ざるのを避ける：とりあえず "（B:" 以降は捨てる
        vtxt = str(venue_raw)
        vtxt = vtxt.split("（B:", 1)[0].split("(B:", 1)[0]
        hits = find_city_tokens_in_text(vtxt, city_keys)
        keys.extend(hits)

    seen=set(); uniq=[]
    for k in keys:
        if k and k not in seen:
            uniq.append(k); seen.add(k)
    return uniq

def add_city_key(df):
    keys1, scores = [], []
    key_lists = []
    group_lists = []
    for city_raw, venue_raw in zip(df["city_raw"].tolist(), df["venue_raw"].tolist()):
        klist = match_city_keys_multi(city_raw, venue_raw)
        key_lists.append(klist)
        k0 = klist[0] if klist else None
        keys1.append(k0)

        glist = []
        for ck in klist:
            gk = group_of_citykey(ck)
            if gk and gk not in glist:
                glist.append(gk)
        group_lists.append(glist)

        if k0:
            toks = split_city_tokens(city_raw)
            probe = toks[0] if toks else city_raw
            _, sc = match_city_key(probe)
            scores.append(sc)
        else:
            scores.append(0.0)

    out = df.copy()
    out["city_key"] = keys1
    out["city_keys"] = key_lists
    out["group_keys"] = group_lists
    out["match_score"] = scores
    return out

scheduled42_all = add_city_key(b42.copy())
scheduled43_all = add_city_key(b43.copy())

# ★履歴に入れるのは「実会場」だけ（会場要検討は履歴にしない）
scheduled42 = scheduled42_all[scheduled42_all.apply(lambda r: is_fixed_row(r["city_raw"], r["venue_raw"]), axis=1)].copy()
scheduled43 = scheduled43_all[scheduled43_all.apply(lambda r: is_fixed_row(r["city_raw"], r["venue_raw"]), axis=1)].copy()

# ★空きAJ枠：city空 かつ (venue空 or 会場要検討)
open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"].apply(is_placeholder_venue))
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ======================================================
# 8) 都道府県コード推定
# ======================================================
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

pref_by_group = {}
for gk, members in group_members.items():
    pc = ""
    for mck in members:
        pc = pref_by_city.get(mck, "")
        if pc:
            break
    pref_by_group[gk] = pc

pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
group_gap = {gk: gap_weeks_from_count(int(cnt)) for gk, cnt in plan_count_by_group.items()}

# ======================================================
# 9) スパン履歴（42→43連結）★グループ単位
# ======================================================
OFFSET_43 = len(week_order_42)

scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43

scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_group = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)
    glist = e.get("group_keys", [])
    if isinstance(glist, list):
        for gk in glist:
            if gk:
                last_pos_group[gk] = max(last_pos_group.get(gk, -999), ap)

# 43期の「確定分」だけで消化数（グループ）
scheduled_counts_43_group = {}
for _, e in scheduled43.iterrows():
    glist = e.get("group_keys", [])
    if isinstance(glist, list):
        for gk in glist:
            if gk:
                scheduled_counts_43_group[gk] = scheduled_counts_43_group.get(gk, 0) + 1

# ======================================================
# 10) 例外増枠プール
# ======================================================
stats2 = stats.copy()
stats2[muni_col] = stats2[muni_col].astype(str).str.strip()

class_col = stats2.columns[9] if len(stats2.columns) >= 10 else stats2.columns[-1]
stats2[class_col] = pd.to_numeric(stats2[class_col], errors="coerce")

pop_col = next((c for c in stats2.columns if "人口" in str(c)), None)
if pop_col is None:
    num_cols=[]
    for c in stats2.columns:
        s = pd.to_numeric(stats2[c], errors="coerce")
        if s.notna().sum() > 0:
            num_cols.append((c, float(s.max(skipna=True))))
    pop_col = sorted(num_cols, key=lambda x: x[1], reverse=True)[0][0] if num_cols else stats2.columns[-1]
stats2[pop_col] = pd.to_numeric(stats2[pop_col], errors="coerce")

ovr = stats2[(stats2[class_col] == CONFIG["OVR_CLASS_J_VALUE"]) & (stats2[pop_col] >= CONFIG["OVR_MIN_POP"])].copy()

ovr_candidates = {}
for name in ovr[muni_col].tolist():
    ck, sc = match_city_key(name)
    if ck:
        pop_val = float(ovr.loc[ovr[muni_col] == name, pop_col].iloc[0])
        if ck not in ovr_candidates or sc > ovr_candidates[ck]["match_score"]:
            ovr_candidates[ck] = {"name_raw": name, "match_score": sc, "pop": pop_val}

OVR_POOL = set(ovr_candidates.keys())
print(f"✅ 例外増枠プール: {len(OVR_POOL)}（J列={class_col}==1 & 人口列={pop_col}>=18万）")

# ======================================================
# 11) 集客率（あれば）
# ======================================================
rate_col = next((c for c in stats2.columns if ("集客率" in str(c) or "来場率" in str(c) or "動員率" in str(c))), None)
city_rate = {}
if rate_col is not None:
    tmp = stats2[[muni_col, rate_col]].copy()
    tmp["rate"] = pd.to_numeric(tmp[rate_col], errors="coerce")
    tmp = tmp.dropna(subset=["rate"])
    muni_rate_norm = {norm(muni_base(row[muni_col])): float(row["rate"]) for _, row in tmp.iterrows()}
    for ck in city_keys:
        cn = norm(ck)
        if cn in muni_rate_norm:
            city_rate[ck] = muni_rate_norm[cn]

def percentile_need(values_dict):
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

city_pct, city_need = percentile_need(city_rate)

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "不明"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def fmt_pct(p):
    if p is None:
        return "不明"
    return f"下位{p*100:.0f}%"

# ======================================================
# 理由
# ======================================================
def build_reason(variant, week_id, area_label, ck, gk, pc,
                 need_gap_p, gp, need_gap_g, gg,
                 remaining_plan_g, overflow_used, overflow_meta,
                 same_week_pref_hit, same_week_group_hit, relax_mode, score):
    lines=[]
    lines.append(f"【案{variant}】{week_id}／{area_label} の空きAJ枠に対して選定。")
    lines.append(f"0) 地域フィルタ：{area_label} の範囲内（県コード={pc}）のみ。")
    lines.append(f"   連動グループ：{gk}（例：木更津+市原=合計年2回、豊中+吹田=合計年2回）")
    if not overflow_used:
        lines.append(f"1) 計画回数（グループ残）：{gk} 残り {remaining_plan_g} 回 → 計画内で採用（候補={ck}）")
    else:
        pop = overflow_meta.get("pop", None)
        pop_txt = f"{int(pop):,}人" if isinstance(pop, (int,float)) and not math.isnan(pop) else "不明"
        nm = overflow_meta.get("name_raw","") or ck
        lines.append("1) 計画内で埋まらず、例外増枠を使用。")
        lines.append(f"   例外：J列=1 & 人口>=18万 → {nm}（人口={pop_txt}）")

    lines.append(f"2) スパン：県=必要{need_gap_p}週/実績{gp}週、グループ=必要{need_gap_g}週/実績{gg}週"
                 + (f"（緩和={relax_mode}）" if relax_mode else ""))

    if ck in city_rate:
        lines.append(f"3) 集客率：{fmt_rate(city_rate.get(ck))}（{fmt_pct(city_pct.get(ck))}）")
    else:
        lines.append("3) 集客率：データ無し → 回数/スパン優先")

    lines.append("4) 同週回避："
                 + ("同県ペナあり" if same_week_pref_hit else "同県OK")
                 + (" / 同グループペナあり" if same_week_group_hit else ""))

    lines.append(f"【採用】score={score:.2f}")
    return " / ".join(lines)[:32000]

# ======================================================
# 週×都道府県 / 週×グループ（43期確定分のみ）
# ======================================================
week_used_pref_base = {}
week_used_group_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]
    pc = e["pref_code"]
    if w and pc:
        week_used_pref_base.setdefault(w, set()).add(pc)
    glist = e.get("group_keys", [])
    if w and isinstance(glist, list):
        s = week_used_group_base.setdefault(w, set())
        for gk in glist:
            if gk:
                s.add(gk)

# ======================================================
# 12) プラン生成（A/B/C）
# ======================================================
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used_in_plan_group = {gk: 0 for gk in plan_count_by_group.keys()}
    lp_p = dict(last_pos_pref)
    lp_g = dict(last_pos_group)
    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}
    week_used_group = {w:set(s) for w,s in week_used_group_base.items()}

    assigns = []

    def score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow):
        gk = group_of_citykey(ck)
        pc = pref_by_group.get(gk, "") or pref_by_city.get(ck, "")

        if not area_allowed(area_label, pc):
            return None

        plan_cnt_g = plan_count_by_group.get(gk, 0)
        already_g  = scheduled_counts_43_group.get(gk, 0)
        remaining_g = plan_cnt_g - already_g - used_in_plan_group.get(gk, 0)

        overflow_used = False
        overflow_meta = {}

        if remaining_g <= 0:
            if not allow_overflow:
                return None
            if ck not in OVR_POOL:
                return None
            overflow_used = True
            meta = ovr_candidates.get(ck, {})
            overflow_meta = {"name_raw": meta.get("name_raw",""), "pop": meta.get("pop", float("nan"))}

        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_g = group_gap.get(gk, CONFIG["GAP_WEEKS_MIN"])

        last_p = lp_p.get(pc, None) if pc else None
        last_g = lp_g.get(gk, None)

        gp = 999 if last_p is None else (apos - last_p)
        gg = 999 if last_g is None else (apos - last_g)

        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_g = (last_g is None) or (gg >= need_gap_g)

        if relax_mode is None:
            if not (ok_p and ok_g):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        if pc and is_snow_blackout(pc, gk, week_id):
            return None

        same_week_pref_hit  = (pc and pc in week_used_pref.get(week_id, set()))
        same_week_group_hit = (gk and gk in week_used_group.get(week_id, set()))

        same_week_pen = (CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0) + \
                        (CONFIG["SAME_WEEK_CITY_PENALTY"] if same_week_group_hit else 0.0)

        overflow_pen = CONFIG["OVR_OVERFLOW_PENALTY"] if overflow_used else 0.0

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        slack_p = gp - need_gap_p
        slack_g = gg - need_gap_g
        need_city = city_need.get(ck, 0.5)

        score = (
            slack_p * W["pref_slack"] +
            slack_g * W["city_slack"] +
            max(remaining_g, 0) * W["unmet_bonus"] * 5.0 +
            (need_city * 10) * W["low_attr_city"] -
            relax_pen - same_week_pen - overflow_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        reason = build_reason(
            variant, week_id, area_label, ck, gk, pc,
            need_gap_p, gp, need_gap_g, gg,
            remaining_g,
            overflow_used, overflow_meta,
            same_week_pref_hit, same_week_group_hit, relax_mode, score
        )
        return {"ck": ck, "gk": gk, "pc": pc, "score": float(score), "reason": reason}

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]
        area_label = slot.get("area_label", "")

        best = None

        for relax_mode in [None, "Aのみ", "B+A"]:
            for ck in city_keys:
                cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=False)
                if cand and (best is None or cand["score"] > best["score"]):
                    best = cand
            if best is not None:
                break

        if best is None:
            for relax_mode in [None, "Aのみ", "B+A"]:
                for ck in city_keys:
                    cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=True)
                    if cand and (best is None or cand["score"] > best["score"]):
                        best = cand
                if best is not None:
                    break

        if best is None:
            assigns.append({
                "variant": variant, "row_header": int(slot["row_header"]),
                "assign_city_key": "", "pref_code_guess": "", "score": float("-inf"),
                "reason_BT": f"案{variant}: 条件により候補なし（地域={area_label}）"
            })
            continue

        ck = best["ck"]; gk = best["gk"]; pc = best["pc"]

        used_in_plan_group[gk] = used_in_plan_group.get(gk, 0) + 1
        lp_g[gk] = apos
        week_used_group.setdefault(week_id, set()).add(gk)

        if pc:
            lp_p[pc] = apos
            week_used_pref.setdefault(week_id, set()).add(pc)

        assigns.append({
            "variant": variant, "row_header": int(slot["row_header"]),
            "assign_city_key": ck, "pref_code_guess": pc, "score": best["score"],
            "reason_BT": best["reason"]
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

def to_map(df, col):
    return {int(r["row_header"]): r[col] for _, r in df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")
A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")
A_reason = to_map(planA, "reason_BT")

# ======================================================
# 13) 書き戻し（43期の空きAJ枠だけに書く）
# ======================================================
wb = openpyxl.load_workbook(QUARTER_XLSX)

for name in list(wb.sheetnames):
    if name not in {sheet42, sheet43}:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1

    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value

    # ★空き判定：city空 かつ (venue空 or 会場要検討) のときだけ上書き
    if not ((c_val is None or str(c_val).strip()=="") and is_placeholder_venue(d_val)):
        continue

    a = str(A_city.get(int(row0), "") or "")
    b = str(B_city.get(int(row0), "") or "")
    c = str(C_city.get(int(row0), "") or "")

    ws43.cell(r, COL_CITY+1).value = a
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b if b else '-'} / C:{c if c else '-'}）"

    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    if (ws43.cell(r, COL_PREF+1).value is None) or (str(ws43.cell(r, COL_PREF+1).value).strip()==""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    ws43.cell(r, COL_REASON_BT+1).value = str(A_reason.get(int(row0), "案A: 理由生成なし"))
    written += 1

wb.save(OUT_QUARTER_ABC)

print("\n✅ 入力:", QUARTER_XLSX)
print("✅ 出力:", OUT_QUARTER_ABC)
print("✅ 43期 空きAJ枠（city空＆会場要検討）:", len(open43_AJ))
print("✅ 書込数:", written)
print("✅ グループcap例:", {k:GROUP_CAP.get(k) for k in sorted(GROUP_CAP)[:20]})


✅ 例外増枠プール: 1（J列=Zの4分位（1=赤,2=黄,3=青,4=灰）==1 & 人口列=人口>=18万）

✅ 入力: SA+AJ+共有用_四半期表20240303.xlsx
✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx
✅ 43期 空きAJ枠（city空＆会場要検討）: 137
✅ 書込数: 137
✅ グループcap例: {'木更津_市原': 2, '豊中_吹田': 2}


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）
# ★東/西/九州の都道府県範囲フィルタ
# ★alias / 読み / 誤字ゆれ吸収（会場マスタ不使用）
# ★束ね対応（木更津・市原など）＝構成市すべて同一エリア扱い（A仕様：両方1消化）
# ★さらに追加：連動グループ対応（木更津+市原=合計年2回、豊中+吹田=合計年2回 等）
#   → 回数とスパンは group_key 単位で管理（年2回→gap=約26週）
# ============================================

!pip -q install fugashi unidic-lite jaconv

import os, re, math, random, difflib
import pandas as pd
import openpyxl
import jaconv
from fugashi import Tagger
tagger = Tagger()

# ====== 入力 ======
UP2 = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT (2).xlsx"
UP1 = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT (1).xlsx"
BASE = "SA+AJ+共有用_四半期表20240303.xlsx"
QUARTER_XLSX = UP2 if os.path.exists(UP2) else (UP1 if os.path.exists(UP1) else BASE)

REGION_XLSX      = "43期地域別会場回数.xlsx"
STATS_XLSX       = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx"

# ====== 列（0-based）=====
COL_WEEK = 0
COL_AREA_OR_KIND = 1     # ヘッダ行=東/西/九州、次行=AJ/合同/SA
COL_CITY = 2
COL_VENUE = 3
COL_PREF = 5
COL_REASON_BT = 72

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,
    "SAME_WEEK_PREF_PENALTY": 80.0,
    "SAME_WEEK_CITY_PENALTY": 60.0,   # 同週同「連動グループ」ペナ（厳禁にすると詰むのでペナ）

    "FUZZY_CUTOFF": 0.86,
    "CANON_CUTOFF": 0.93,

    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    "OVR_CLASS_J_VALUE": 1,
    "OVR_MIN_POP": 180000,
    "OVR_OVERFLOW_PENALTY": 15.0,

    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2,
              "low_attr_city": 2.0, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0,
              "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2,
              "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ======================================================
# 0) alias辞書（CSV）…追記して育てる運用
# ======================================================
ALIAS_CSV = "市区分_alias.csv"

def ensure_alias_template(path=ALIAS_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"alias":"なんば","canonical":"難波"},
        {"alias":"薩摩河内","canonical":"薩摩川内"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ aliasテンプレ作成: {path}（必要に応じて追記してください）")

def load_alias_map(path=ALIAS_CSV):
    ensure_alias_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["alias"] = df["alias"].astype(str).str.strip()
        df["canonical"] = df["canonical"].astype(str).str.strip()
        return {a:c for a,c in zip(df["alias"], df["canonical"]) if a and c}
    except Exception as e:
        print("⚠️ alias読み込み失敗。aliasなしで続行:", e)
        return {}

ALIAS_MAP = load_alias_map(ALIAS_CSV)

def apply_alias(s: str) -> str:
    if s is None:
        return ""
    t = str(s).strip()
    if not t:
        return ""
    return ALIAS_MAP.get(t, t)

# ======================================================
# ★追加：連動グループ定義（CSVで育てる）
#   city_key,group_key,cap
#   例：木更津,木更津_市原,2
#       市原,木更津_市原,2
#       豊中,豊中_吹田,2
#       吹田,豊中_吹田,2
# ======================================================
GROUP_CSV = "市区分_group.csv"

def ensure_group_template(path=GROUP_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"city_key":"木更津","group_key":"木更津_市原","cap":"2"},
        {"city_key":"市原","group_key":"木更津_市原","cap":"2"},
        {"city_key":"豊中","group_key":"豊中_吹田","cap":"2"},
        {"city_key":"吹田","group_key":"豊中_吹田","cap":"2"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ groupテンプレ作成: {path}（必要に応じて追記してください）")

def load_group_map(path=GROUP_CSV):
    ensure_group_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["city_key"]  = df["city_key"].astype(str).str.strip()
        df["group_key"] = df["group_key"].astype(str).str.strip()
        # capは同一group内で同じ想定。空なら後で合算にフォールバック
        df["cap"] = df["cap"].astype(str).str.strip()
        city_to_group = {}
        group_cap = {}
        for _, r in df.iterrows():
            ck = r["city_key"]; gk = r["group_key"]
            if ck and gk:
                city_to_group[ck] = gk
                if r["cap"]:
                    try:
                        group_cap[gk] = int(float(r["cap"]))
                    except:
                        pass
        return city_to_group, group_cap
    except Exception as e:
        print("⚠️ group読み込み失敗。groupなしで続行:", e)
        return {}, {}

CITY_TO_GROUP, GROUP_CAP = load_group_map(GROUP_CSV)

def group_of_citykey(ck: str) -> str:
    """候補city_keyを連動グループに寄せる（未定義なら自分自身がグループ）"""
    if not ck:
        return ""
    return CITY_TO_GROUP.get(ck, ck)

# ======================================================
# 1) 正規化・読みキー・誤字ゆれ生成
# ======================================================
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def muni_base(name: str) -> str:
    if name is None:
        return ""
    s = str(name).strip()
    s = re.sub(r"(都|道|府|県)$", "", s)
    s = re.sub(r"(市|区|町|村)$", "", s)
    return s

def to_katakana_reading(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip()
    if not s:
        return ""
    s2 = re.sub(r"[ \t\r\n\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)

    if re.search(r"[ぁ-んァ-ン]", s2):
        s2 = jaconv.normalize(jaconv.hira2kata(s2))
        s2 = re.sub(r"[^ァ-ン0-9A-Z]", "", s2)
        return s2

    yomi_parts = []
    for w in tagger(s2):
        feat = w.feature
        reading = None
        for k in ["reading", "kana", "pron"]:
            if hasattr(feat, k):
                reading = getattr(feat, k)
                break
        if not reading or reading == "*":
            reading = w.surface
        yomi_parts.append(reading)

    yomi = "".join(yomi_parts)
    yomi = jaconv.normalize(jaconv.hira2kata(yomi))
    yomi = re.sub(r"[^ァ-ン0-9A-Z]", "", yomi)
    return yomi

CONFUSION = {
    "川": ["河"], "河": ["川"],
    "崎": ["﨑"], "﨑": ["崎"],
    "ヶ": ["ケ"], "ケ": ["ヶ"],
    "斉": ["齋", "斎"], "齋": ["斉", "斎"], "斎": ["斉", "齋"],
    "邊": ["辺", "邉"], "邉": ["辺", "邊"], "辺": ["邊", "邉"],
}

def gen_variants(s: str, limit=12):
    if s is None:
        return [""]
    s = str(s)
    vars_ = {s}
    for a, bs in CONFUSION.items():
        if a in s:
            new_set = set(vars_)
            for v in vars_:
                for b in bs:
                    new_set.add(v.replace(a, b))
            vars_ = new_set
        if len(vars_) >= limit:
            break
    return list(vars_)[:limit]

def best_ratio(a, b):
    return difflib.SequenceMatcher(None, a, b).ratio()

# ======================================================
# ★束ね分解 + D列ヒント
# ======================================================
DELIMS = r"[・／/、,＋+＆&\s　]+"

def strip_annotations(s: str) -> str:
    if not s:
        return ""
    t = str(s)
    t = re.split(r"[（(]", t, maxsplit=1)[0]
    t = re.split(r"(会場|要検討|検討|確定)", t, maxsplit=1)[0]
    return t.strip()

def split_city_tokens(raw: str):
    t = strip_annotations(raw)
    if not t:
        return []
    parts = [p.strip() for p in re.split(DELIMS, t) if p and p.strip()]
    out = []
    for p in parts:
        p2 = apply_alias(p)
        b = muni_base(p2)
        if b:
            out.append(b)
    seen = set()
    uniq = []
    for x in out:
        if x not in seen:
            uniq.append(x); seen.add(x)
    return uniq

def find_city_tokens_in_text(text: str, candidates_city_keys):
    if not text:
        return []
    tn = norm(text)
    if not tn:
        return []
    hits = []
    for ck in candidates_city_keys:
        nk = norm(ck)
        if nk and nk in tn:
            hits.append(ck)
    seen = set()
    out = []
    for h in hits:
        if h not in seen:
            out.append(h); seen.add(h)
    return out

# ======================================================
# 2) 東/西/九州 の都道府県範囲
# ======================================================
EAST_PREF_NUMS  = set(list(range(1, 17)) + [19,20,21,22,23,24])
WEST_PREF_NUMS  = set([17,18] + list(range(25, 34)) + [31,32] + [36,37,38,39])
KYUSHU_PREF_NUMS= set([34,35] + list(range(40, 47)))

def pref_num(pref_code_str: str):
    if not pref_code_str:
        return None
    s = str(pref_code_str).strip()
    m = re.match(r"^(\d{2})", s)
    return int(m.group(1)) if m else None

def area_allowed(area_label: str, pref_code_str: str) -> bool:
    al = (str(area_label).strip() if area_label else "")
    pn = pref_num(pref_code_str)
    if pn is None:
        return True
    if al == "東":
        return pn in EAST_PREF_NUMS
    if al == "西":
        return pn in WEST_PREF_NUMS
    if al == "九州":
        return pn in KYUSHU_PREF_NUMS
    return True

# ======================================================
# 3) 補助関数
# ======================================================
def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, group_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if group_key and group_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in {1}:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]

        area_label = "" if pd.isna(header[COL_AREA_OR_KIND]) else str(header[COL_AREA_OR_KIND]).strip()
        kind = kind_norm(detail[COL_AREA_OR_KIND])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "area_label": area_label,
            "kind": kind, "city_raw": city, "venue_raw": venue, "pref_code": pref
        })
    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

# ======================================================
# 4) 統計量（例外増枠用 + 正規化基準）
# ======================================================
stats = pd.read_excel(STATS_XLSX, sheet_name=0)
stats.columns = [str(c).strip() for c in stats.columns]
muni_col = "市区町村" if "市区町村" in stats.columns else stats.columns[0]

stats_base_list = stats[muni_col].astype(str).map(muni_base).map(str.strip)
stats_base_list = stats_base_list[stats_base_list != ""].dropna().unique().tolist()

stats_norm_to_base = {norm(x): x for x in stats_base_list if x}
stats_norms = list(stats_norm_to_base.keys())

def canonize_city_key(city_key_raw: str):
    if not city_key_raw:
        return "", 0.0
    x = apply_alias(city_key_raw)
    b = muni_base(x)
    bn = norm(b)

    if bn in stats_norm_to_base and bn:
        return stats_norm_to_base[bn], 1.0

    for v in gen_variants(b):
        vn = norm(v)
        if vn in stats_norm_to_base and vn:
            return stats_norm_to_base[vn], 0.995

    best_base, best_r = None, 0.0
    for v in [b] + gen_variants(b):
        vn = norm(v)
        for sn in stats_norms:
            r = best_ratio(vn, sn)
            if r > best_r:
                best_r = r
                best_base = stats_norm_to_base[sn]
    if best_base and best_r >= CONFIG["CANON_CUTOFF"]:
        return best_base, best_r

    return b, best_r

# ======================================================
# 5) 四半期表読み込み
# ======================================================
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ======================================================
# 6) 地域別回数読み込み → 市区分キー正規化して集約
# ======================================================
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None, dtype=str)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()].copy()
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key_raw"] = plan_rows[1].astype(str).str.strip()
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)

canon_list = []
for x in plan_rows["city_key_raw"].tolist():
    canon, sc = canonize_city_key(x)
    canon_list.append(canon)
plan_rows["city_key"] = canon_list

region_master = plan_rows.groupby(["pref_parent","city_key"], as_index=False)["plan_count"].sum()

city_keys = region_master["city_key"].tolist()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# ===== 連動グループで plan_count を組み直す（cap指定があればそれ優先）=====
group_members = {}
for ck in city_keys:
    gk = group_of_citykey(ck)
    group_members.setdefault(gk, []).append(ck)

plan_count_by_group = {}
for gk, members in group_members.items():
    if gk in GROUP_CAP and GROUP_CAP[gk] > 0:
        plan_count_by_group[gk] = GROUP_CAP[gk]
    else:
        plan_count_by_group[gk] = int(sum(plan_count_by_city.get(m, 0) for m in members))

# ======================================================
# 7) 候補側の索引（表記norm + 読み）
# ======================================================
city_norm_map = {ck: norm(ck) for ck in city_keys}
norm_to_city = {}
for ck, nk in city_norm_map.items():
    if nk:
        norm_to_city.setdefault(nk, []).append(ck)

reading_to_city = {}
for ck in city_keys:
    rd = to_katakana_reading(ck)
    if rd:
        reading_to_city.setdefault(rd, []).append(ck)

def choose_best_by_fuzzy(query_base, cands):
    qn = norm(query_base)
    best_ck, best_r = None, 0.0
    for ck in cands:
        r = best_ratio(qn, norm(ck))
        if r > best_r:
            best_r = r
            best_ck = ck
    return best_ck, best_r

def match_city_key(city_name: str):
    if not city_name:
        return None, 0.0
    x = apply_alias(city_name)
    b = muni_base(x)
    bn = norm(b)

    if bn in norm_to_city and bn:
        cands = norm_to_city[bn]
        if len(cands) == 1:
            return cands[0], 1.0
        ck, rr = choose_best_by_fuzzy(b, cands)
        return ck, max(0.97, rr)

    for v in gen_variants(b):
        vn = norm(v)
        if vn in norm_to_city and vn:
            cands = norm_to_city[vn]
            if len(cands) == 1:
                return cands[0], 0.995
            ck, rr = choose_best_by_fuzzy(v, cands)
            return ck, max(0.95, rr)

    rd = to_katakana_reading(b)
    cands = reading_to_city.get(rd, [])
    if len(cands) == 1:
        return cands[0], 0.99
    elif len(cands) >= 2:
        ck, rr = choose_best_by_fuzzy(b, cands)
        if ck:
            return ck, max(0.93, rr)

    best_ck, best_r = None, 0.0
    for ck in city_keys:
        r = best_ratio(bn, city_norm_map.get(ck, ""))
        if r > best_r:
            best_r = r
            best_ck = ck
    if best_ck and best_r >= CONFIG["FUZZY_CUTOFF"]:
        return best_ck, best_r
    return None, best_r

def match_city_keys_multi(city_raw: str, venue_raw: str = ""):
    keys = []
    tokens = split_city_tokens(city_raw)
    for tok in tokens:
        ck, sc = match_city_key(tok)
        if ck:
            keys.append(ck)
    if (not tokens or len(keys) == 0) and venue_raw:
        hits = find_city_tokens_in_text(venue_raw, city_keys)
        keys.extend(hits)
    seen = set()
    uniq = []
    for k in keys:
        if k and k not in seen:
            uniq.append(k); seen.add(k)
    return uniq

def add_city_key(df):
    keys1, scores = [], []
    key_lists = []
    group_lists = []
    for city_raw, venue_raw in zip(df["city_raw"].tolist(), df["venue_raw"].tolist()):
        klist = match_city_keys_multi(city_raw, venue_raw)
        key_lists.append(klist)
        k0 = klist[0] if klist else None
        keys1.append(k0)

        # 連動グループ（束ね構成市→グループに寄せる）
        glist = []
        for ck in klist:
            gk = group_of_citykey(ck)
            if gk and gk not in glist:
                glist.append(gk)
        group_lists.append(glist)

        if k0:
            toks = split_city_tokens(city_raw)
            probe = toks[0] if toks else city_raw
            _, sc = match_city_key(probe)
            scores.append(sc)
        else:
            scores.append(0.0)

    out = df.copy()
    out["city_key"] = keys1
    out["city_keys"] = key_lists
    out["group_keys"] = group_lists
    out["match_score"] = scores
    return out

scheduled42 = add_city_key(b42[(b42["city_raw"]!="") | (b42["venue_raw"]!="")].copy())
scheduled43 = add_city_key(b43[(b43["city_raw"]!="") | (b43["venue_raw"]!="")].copy())

open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ======================================================
# 8) 都道府県コード推定
# ======================================================
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

# グループ→県は「メンバーの県が取れるものを優先」で代表値
pref_by_group = {}
for gk, members in group_members.items():
    pc = ""
    for mck in members:
        pc = pref_by_city.get(mck, "")
        if pc:
            break
    pref_by_group[gk] = pc

# gap（県/グループ）
pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
group_gap = {gk: gap_weeks_from_count(int(cnt)) for gk, cnt in plan_count_by_group.items()}

# ======================================================
# 9) スパン履歴（42→43連結）★グループ単位で更新
# ======================================================
OFFSET_43 = len(week_order_42)

scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43

scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_group = {}, {}

for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)

    # 連動グループ（束ね含む）：group_keys の全要素に反映
    glist = e.get("group_keys", [])
    if isinstance(glist, list) and glist:
        for gk in glist:
            if gk:
                last_pos_group[gk] = max(last_pos_group.get(gk, -999), ap)

# ★A仕様：束ね1回＝構成市すべて1消化 → さらに連動グループで「合計消化」する
scheduled_counts_43_group = {}
for _, e in scheduled43.iterrows():
    glist = e.get("group_keys", [])
    if isinstance(glist, list) and glist:
        for gk in glist:
            if gk:
                scheduled_counts_43_group[gk] = scheduled_counts_43_group.get(gk, 0) + 1

# ======================================================
# 10) 例外増枠プール（J列=1 & 人口>=18万）
# ======================================================
stats2 = stats.copy()
stats2[muni_col] = stats2[muni_col].astype(str).str.strip()

class_col = stats2.columns[9] if len(stats2.columns) >= 10 else stats2.columns[-1]
stats2[class_col] = pd.to_numeric(stats2[class_col], errors="coerce")

pop_col = next((c for c in stats2.columns if "人口" in str(c)), None)
if pop_col is None:
    num_cols = []
    for c in stats2.columns:
        s = pd.to_numeric(stats2[c], errors="coerce")
        if s.notna().sum() > 0:
            num_cols.append((c, float(s.max(skipna=True))))
    pop_col = sorted(num_cols, key=lambda x: x[1], reverse=True)[0][0] if num_cols else stats2.columns[-1]
stats2[pop_col] = pd.to_numeric(stats2[pop_col], errors="coerce")

ovr = stats2[(stats2[class_col] == CONFIG["OVR_CLASS_J_VALUE"]) & (stats2[pop_col] >= CONFIG["OVR_MIN_POP"])].copy()

ovr_candidates = {}
for name in ovr[muni_col].tolist():
    ck, sc = match_city_key(name)
    if ck:
        pop_val = float(ovr.loc[ovr[muni_col] == name, pop_col].iloc[0])
        if ck not in ovr_candidates or sc > ovr_candidates[ck]["match_score"]:
            ovr_candidates[ck] = {"name_raw": name, "match_score": sc, "pop": pop_val}

OVR_POOL = set(ovr_candidates.keys())
print(f"✅ 例外増枠プール: {len(OVR_POOL)}（J列={class_col}==1 & 人口列={pop_col}>=18万）")

# ======================================================
# 11) 集客率（あれば市区分に寄せる。無ければ0.5）
# ======================================================
rate_col = next((c for c in stats2.columns if ("集客率" in str(c) or "来場率" in str(c) or "動員率" in str(c))), None)
city_rate = {}
if rate_col is not None:
    tmp = stats2[[muni_col, rate_col]].copy()
    tmp["rate"] = pd.to_numeric(tmp[rate_col], errors="coerce")
    tmp = tmp.dropna(subset=["rate"])
    muni_rate_norm = {norm(muni_base(row[muni_col])): float(row["rate"]) for _, row in tmp.iterrows()}
    for ck in city_keys:
        cn = norm(ck)
        if cn in muni_rate_norm:
            city_rate[ck] = muni_rate_norm[cn]

def percentile_need(values_dict):
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

city_pct, city_need = percentile_need(city_rate)

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "不明"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def fmt_pct(p):
    if p is None:
        return "不明"
    return f"下位{p*100:.0f}%"

# ======================================================
# 理由生成（グループ単位のスパン・残回数を表示）
# ======================================================
def build_reason(variant, week_id, area_label, ck, gk, pc,
                 need_gap_p, gp, need_gap_g, gg,
                 remaining_plan_g, overflow_used, overflow_meta,
                 same_week_pref_hit, same_week_group_hit, relax_mode, score):
    lines = []
    lines.append(f"【案{variant}】この枠（{week_id}／{area_label}）はAJ枠が空いていたので、次の順で選びました。")
    lines.append(f"0) 地域フィルタ：この枠は「{area_label}」なので、対象都道府県の範囲内だけから選定。")
    lines.append(f"   連動グループ：{gk}（例：木更津+市原=合計年2回、豊中+吹田=合計年2回）")
    if not overflow_used:
        lines.append(f"1) 計画回数（グループ残）：{gk} は残り {remaining_plan_g} 回あるため計画内で採用。（候補={ck}）")
    else:
        pop = overflow_meta.get("pop", None)
        pop_txt = f"{int(pop):,}人" if isinstance(pop, (int,float)) and not math.isnan(pop) else "不明"
        nm = overflow_meta.get("name_raw","") or ck
        lines.append("1) 計画内だけでは埋め切れず、例外ルールで増枠。")
        lines.append(f"   例外条件：統計量のJ列=1 & 人口18万人以上 → {nm}（人口={pop_txt}）")

    lines.append(f"2) スパン：県は必要{need_gap_p}週に対し実績{gp}週／グループは必要{need_gap_g}週に対し実績{gg}週"
                 + (f"（制約緩和={relax_mode}）" if relax_mode else ""))

    if ck in city_rate:
        lines.append(f"3) 集客率：{fmt_rate(city_rate.get(ck))}（{fmt_pct(city_pct.get(ck))}）。改善余地が大きいほど優先度UP。")
    else:
        lines.append("3) 集客率：データが取れないため、回数とスパンを優先。")

    lines.append("4) 同週はカニバりやすいので基本回避。"
                 + ("（同週同県ペナ）" if same_week_pref_hit else "（同週同県は回避）")
                 + ("（同週同グループペナ）" if same_week_group_hit else ""))

    lines.append("5) 豪雪の12〜2月・3月1wは原則除外（例外リストで解除可）。")
    lines.append(f"【まとめ】グループ回数×グループスパン×改善余地×同週回避を総合。（score={score:.2f}）")
    return " / ".join(lines)[:32000]

# ======================================================
# 週×都道府県 / 週×グループ（43期）
# ======================================================
week_used_pref_base = {}
week_used_group_base = {}

for _, e in scheduled43.iterrows():
    w = e["week_id"]
    pc = e["pref_code"]
    if w and pc:
        week_used_pref_base.setdefault(w, set()).add(pc)
    glist = e.get("group_keys", [])
    if w and isinstance(glist, list):
        s = week_used_group_base.setdefault(w, set())
        for gk in glist:
            if gk:
                s.add(gk)

# ======================================================
# 12) プラン生成（A/B/C）…★グループ単位で残回数・スパン判定
# ======================================================
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used_in_plan_group = {gk: 0 for gk in plan_count_by_group.keys()}
    lp_p = dict(last_pos_pref)
    lp_g = dict(last_pos_group)
    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}
    week_used_group = {w:set(s) for w,s in week_used_group_base.items()}

    assigns = []

    def score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow):
        gk = group_of_citykey(ck)
        pc = pref_by_group.get(gk, "") or pref_by_city.get(ck, "")

        # 地域フィルタ（県ベース）
        if not area_allowed(area_label, pc):
            return None

        plan_cnt_g = plan_count_by_group.get(gk, 0)
        already_g  = scheduled_counts_43_group.get(gk, 0)
        remaining_g = plan_cnt_g - already_g - used_in_plan_group.get(gk, 0)

        overflow_used = False
        overflow_meta = {}

        if remaining_g <= 0:
            # グループ残が無い場合は、例外増枠（ck単位のプール）に頼る
            if not allow_overflow:
                return None
            if ck not in OVR_POOL:
                return None
            overflow_used = True
            meta = ovr_candidates.get(ck, {})
            overflow_meta = {"name_raw": meta.get("name_raw",""), "pop": meta.get("pop", float("nan"))}

        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_g = group_gap.get(gk, CONFIG["GAP_WEEKS_MIN"])

        last_p = lp_p.get(pc, None) if pc else None
        last_g = lp_g.get(gk, None)

        gp = 999 if last_p is None else (apos - last_p)
        gg = 999 if last_g is None else (apos - last_g)

        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_g = (last_g is None) or (gg >= need_gap_g)

        if relax_mode is None:
            if not (ok_p and ok_g):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪（グループ単位で弾く）
        if pc and is_snow_blackout(pc, gk, week_id):
            return None

        same_week_pref_hit  = (pc and pc in week_used_pref.get(week_id, set()))
        same_week_group_hit = (gk and gk in week_used_group.get(week_id, set()))

        same_week_pen = (CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0) + \
                        (CONFIG["SAME_WEEK_CITY_PENALTY"] if same_week_group_hit else 0.0)

        overflow_pen = CONFIG["OVR_OVERFLOW_PENALTY"] if overflow_used else 0.0

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        slack_p = gp - need_gap_p
        slack_g = gg - need_gap_g
        need_city = city_need.get(ck, 0.5)

        score = (
            slack_p * W["pref_slack"] +
            slack_g * W["city_slack"] +                      # ★市ではなくグループスパンを採点
            max(remaining_g, 0) * W["unmet_bonus"] * 5.0 +   # ★残回数もグループ
            (need_city * 10) * W["low_attr_city"] -
            relax_pen - same_week_pen - overflow_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        reason = build_reason(
            variant, week_id, area_label, ck, gk, pc,
            need_gap_p, gp, need_gap_g, gg,
            remaining_g,
            overflow_used, overflow_meta,
            same_week_pref_hit, same_week_group_hit, relax_mode, score
        )

        return {"ck": ck, "gk": gk, "pc": pc, "score": float(score), "reason": reason}

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]
        area_label = slot.get("area_label", "")

        best = None

        # 計画内で探す
        for relax_mode in [None, "Aのみ", "B+A"]:
            for ck in city_keys:
                cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=False)
                if cand and (best is None or cand["score"] > best["score"]):
                    best = cand
            if best is not None:
                break

        # 例外増枠
        if best is None:
            for relax_mode in [None, "Aのみ", "B+A"]:
                for ck in city_keys:
                    cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=True)
                    if cand and (best is None or cand["score"] > best["score"]):
                        best = cand
                if best is not None:
                    break

        if best is None:
            assigns.append({
                "variant": variant, "row_header": int(slot["row_header"]),
                "assign_city_key": "", "pref_code_guess": "", "score": float("-inf"),
                "reason_BT": f"案{variant}: 条件により候補なし（地域={area_label}）"
            })
            continue

        ck = best["ck"]; gk = best["gk"]; pc = best["pc"]

        # ★更新：消化・履歴はグループ単位
        used_in_plan_group[gk] = used_in_plan_group.get(gk, 0) + 1
        lp_g[gk] = apos
        week_used_group.setdefault(week_id, set()).add(gk)

        if pc:
            lp_p[pc] = apos
            week_used_pref.setdefault(week_id, set()).add(pc)

        assigns.append({
            "variant": variant, "row_header": int(slot["row_header"]),
            "assign_city_key": ck, "pref_code_guess": pc, "score": best["score"],
            "reason_BT": best["reason"]
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

def to_map(df, col):
    return {int(r["row_header"]): r[col] for _, r in df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")
A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")
A_reason = to_map(planA, "reason_BT")

# ======================================================
# 13) 書き戻し（43期の空欄AJ枠に A + (B/C併記) + BT理由）
# ======================================================
wb = openpyxl.load_workbook(QUARTER_XLSX)

for name in list(wb.sheetnames):
    if name not in {sheet42, sheet43}:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1

    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value
    if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
        continue

    a = str(A_city.get(int(row0), "") or "")
    b = str(B_city.get(int(row0), "") or "")
    c = str(C_city.get(int(row0), "") or "")

    ws43.cell(r, COL_CITY+1).value = a
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b if b else '-'} / C:{c if c else '-'}）"

    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    if (ws43.cell(r, COL_PREF+1).value is None) or (str(ws43.cell(r, COL_PREF+1).value).strip()==""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    ws43.cell(r, COL_REASON_BT+1).value = str(A_reason.get(int(row0), "案A: 理由生成なし"))
    written += 1

wb.save(OUT_QUARTER_ABC)
print("\n✅ 入力:", QUARTER_XLSX)
print("✅ 出力:", OUT_QUARTER_ABC)
print("✅ 43期 空欄AJ枠 書込数:", written)
print("✅ 連動グループ cap（例：年2回→gap約26週）:", {k:GROUP_CAP.get(k) for k in sorted(GROUP_CAP)[:20]})
print("✅ 東:", sorted(EAST_PREF_NUMS))
print("✅ 西:", sorted(WEST_PREF_NUMS))
print("✅ 九州:", sorted(KYUSHU_PREF_NUMS))


✅ 例外増枠プール: 1（J列=Zの4分位（1=赤,2=黄,3=青,4=灰）==1 & 人口列=人口>=18万）

✅ 入力: SA+AJ+共有用_四半期表20240303.xlsx
✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx
✅ 43期 空欄AJ枠 書込数: 137
✅ 連動グループ cap（例：年2回→gap約26週）: {'木更津_市原': 2, '豊中_吹田': 2}
✅ 東: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24]
✅ 西: [17, 18, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39]
✅ 九州: [34, 35, 40, 41, 42, 43, 44, 45, 46]


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）
# ★追加：東/西/九州の都道府県範囲フィルタ
# ★追加：①alias ②読みキー ③誤字ゆれ生成 による表記ゆれ吸収
# ★追加：市区分「束ね」対応（例：木更津・市原）＝構成市すべてを同一エリア扱い（A仕様：両方1消化）
# ★追加：C列基本、Cで拾えない時はD列（会場）から市区分ヒント拾い（同セル内）
# （会場マスタは使わない）
# ============================================

!pip -q install fugashi unidic-lite jaconv

import os, re, math, random, difflib
import pandas as pd
import openpyxl
import jaconv
from fugashi import Tagger

tagger = Tagger()

# ====== 入力 ======
# まず /mnt/data にあるアップロードファイルを優先して拾う（あれば）
DEFAULT_UPLOADED = "/mnt/data/SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT (1).xlsx"
QUARTER_XLSX = DEFAULT_UPLOADED if os.path.exists(DEFAULT_UPLOADED) else "SA+AJ+共有用_四半期表20240303.xlsx"

REGION_XLSX      = "43期地域別会場回数.xlsx"
STATS_XLSX       = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

# ====== 出力（42/43のみ残して出す）======
OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx"

# ====== 列（0-based）=====
COL_WEEK = 0
COL_AREA_OR_KIND = 1     # ★ヘッダ行=東/西/九州、次行=AJ/合同/SA
COL_CITY = 2             # 都道府県/市区分（例：静岡、難波、博多…）
COL_VENUE = 3            # 会場（※この文字列内にもヒントがある場合がある）
COL_PREF = 5
COL_REASON_BT = 72       # BT

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,
    "SAME_WEEK_PREF_PENALTY": 80.0,  # 同週同県ペナ
    "SAME_WEEK_CITY_PENALTY": 60.0,  # ★追加：同週同市区分（束ね構成市含む）ペナ（破綻しにくいのでペナに留める）

    # 表記ゆれ
    "FUZZY_CUTOFF": 0.86,
    "CANON_CUTOFF": 0.93,

    # 豪雪
    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    # 例外増枠
    "OVR_CLASS_J_VALUE": 1,
    "OVR_MIN_POP": 180000,
    "OVR_OVERFLOW_PENALTY": 15.0,

    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2,
              "low_attr_city": 2.0, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0,
              "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2,
              "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ======================================================
# 0) alias辞書（CSV）…追記して育てる運用
# ======================================================
ALIAS_CSV = "市区分_alias.csv"

def ensure_alias_template(path=ALIAS_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"alias":"なんば","canonical":"難波"},
        {"alias":"薩摩河内","canonical":"薩摩川内"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ aliasテンプレ作成: {path}（必要に応じて追記してください）")

def load_alias_map(path=ALIAS_CSV):
    ensure_alias_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["alias"] = df["alias"].astype(str).str.strip()
        df["canonical"] = df["canonical"].astype(str).str.strip()
        return {a:c for a,c in zip(df["alias"], df["canonical"]) if a and c}
    except Exception as e:
        print("⚠️ alias読み込み失敗。aliasなしで続行:", e)
        return {}

ALIAS_MAP = load_alias_map(ALIAS_CSV)

def apply_alias(s: str) -> str:
    if s is None:
        return ""
    t = str(s).strip()
    if not t:
        return ""
    return ALIAS_MAP.get(t, t)

# ======================================================
# 1) 正規化・読みキー・誤字ゆれ生成
# ======================================================
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def muni_base(name: str) -> str:
    if name is None:
        return ""
    s = str(name).strip()
    s = re.sub(r"(都|道|府|県)$", "", s)
    s = re.sub(r"(市|区|町|村)$", "", s)
    return s

def to_katakana_reading(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip()
    if not s:
        return ""
    s2 = re.sub(r"[ \t\r\n\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)

    if re.search(r"[ぁ-んァ-ン]", s2):
        s2 = jaconv.normalize(jaconv.hira2kata(s2))
        s2 = re.sub(r"[^ァ-ン0-9A-Z]", "", s2)
        return s2

    yomi_parts = []
    for w in tagger(s2):
        feat = w.feature
        reading = None
        for k in ["reading", "kana", "pron"]:
            if hasattr(feat, k):
                reading = getattr(feat, k)
                break
        if not reading or reading == "*":
            reading = w.surface
        yomi_parts.append(reading)

    yomi = "".join(yomi_parts)
    yomi = jaconv.normalize(jaconv.hira2kata(yomi))
    yomi = re.sub(r"[^ァ-ン0-9A-Z]", "", yomi)
    return yomi

CONFUSION = {
    "川": ["河"],
    "河": ["川"],
    "崎": ["﨑"],
    "﨑": ["崎"],
    "ヶ": ["ケ"],
    "ケ": ["ヶ"],
    "斉": ["齋", "斎"],
    "齋": ["斉", "斎"],
    "斎": ["斉", "齋"],
    "邊": ["辺", "邉"],
    "邉": ["辺", "邊"],
    "辺": ["邊", "邉"],
}

def gen_variants(s: str, limit=12):
    if s is None:
        return [""]
    s = str(s)
    vars_ = {s}
    for a, bs in CONFUSION.items():
        if a in s:
            new_set = set(vars_)
            for v in vars_:
                for b in bs:
                    new_set.add(v.replace(a, b))
            vars_ = new_set
        if len(vars_) >= limit:
            break
    return list(vars_)[:limit]

def best_ratio(a, b):
    return difflib.SequenceMatcher(None, a, b).ratio()

# ======================================================
# ★追加：束ね分解 + D列ヒント（同じ列内）
# ======================================================
DELIMS = r"[・／/、,＋+＆&\s　]+"

def strip_annotations(s: str) -> str:
    if not s:
        return ""
    t = str(s)
    t = re.split(r"[（(]", t, maxsplit=1)[0]
    t = re.split(r"(会場|要検討|検討|確定)", t, maxsplit=1)[0]
    return t.strip()

def split_city_tokens(raw: str):
    t = strip_annotations(raw)
    if not t:
        return []
    parts = [p.strip() for p in re.split(DELIMS, t) if p and p.strip()]
    out = []
    for p in parts:
        p2 = apply_alias(p)
        b = muni_base(p2)
        if b:
            out.append(b)
    seen = set()
    uniq = []
    for x in out:
        if x not in seen:
            uniq.append(x); seen.add(x)
    return uniq

def find_city_tokens_in_text(text: str, candidates_city_keys):
    if not text:
        return []
    tn = norm(text)
    if not tn:
        return []
    hits = []
    for ck in candidates_city_keys:
        nk = norm(ck)
        if nk and nk in tn:
            hits.append(ck)
    seen = set()
    out = []
    for h in hits:
        if h not in seen:
            out.append(h); seen.add(h)
    return out

# ======================================================
# 2) 東/西/九州 の都道府県範囲（ユーザー指定）
# ======================================================
EAST_PREF_NUMS  = set(list(range(1, 17)) + [19,20,21,22,23,24])      # 01-16, 19-24（17,18は西へ）
WEST_PREF_NUMS  = set([17,18] + list(range(25, 34)) + [31,32] +      # 17-18, 25-33（31,32含む）
                      [36,37,38,39])                                 # ★四国 36-39 を西に追加
KYUSHU_PREF_NUMS= set([34,35] + list(range(40, 47)))                 # ★広島34, 山口35, 40-46（沖縄47除外）

def pref_num(pref_code_str: str):
    if not pref_code_str:
        return None
    s = str(pref_code_str).strip()
    m = re.match(r"^(\d{2})", s)
    return int(m.group(1)) if m else None

def area_allowed(area_label: str, pref_code_str: str) -> bool:
    al = (str(area_label).strip() if area_label else "")
    pn = pref_num(pref_code_str)
    if pn is None:
        return True
    if al == "東":
        return pn in EAST_PREF_NUMS
    if al == "西":
        return pn in WEST_PREF_NUMS
    if al == "九州":
        return pn in KYUSHU_PREF_NUMS
    return True

# ======================================================
# 3) 補助関数（週順、スパン、豪雪）
# ======================================================
def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, city_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if city_key and city_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in {1}:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]

        area_label = "" if pd.isna(header[COL_AREA_OR_KIND]) else str(header[COL_AREA_OR_KIND]).strip()
        kind = kind_norm(detail[COL_AREA_OR_KIND])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "area_label": area_label,
            "kind": kind, "city_raw": city, "venue_raw": venue, "pref_code": pref
        })
    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

# ======================================================
# 4) 統計量（例外増枠用 + 正規化基準）
# ======================================================
stats = pd.read_excel(STATS_XLSX, sheet_name=0)
stats.columns = [str(c).strip() for c in stats.columns]
muni_col = "市区町村" if "市区町村" in stats.columns else stats.columns[0]

stats_base_list = stats[muni_col].astype(str).map(muni_base).map(str.strip)
stats_base_list = stats_base_list[stats_base_list != ""].dropna().unique().tolist()

stats_norm_to_base = {norm(x): x for x in stats_base_list if x}
stats_norms = list(stats_norm_to_base.keys())

def canonize_city_key(city_key_raw: str):
    if not city_key_raw:
        return "", 0.0
    x = apply_alias(city_key_raw)
    b = muni_base(x)
    bn = norm(b)

    if bn in stats_norm_to_base and bn:
        return stats_norm_to_base[bn], 1.0

    for v in gen_variants(b):
        vn = norm(v)
        if vn in stats_norm_to_base and vn:
            return stats_norm_to_base[vn], 0.995

    best_base, best_r = None, 0.0
    for v in [b] + gen_variants(b):
        vn = norm(v)
        for sn in stats_norms:
            r = best_ratio(vn, sn)
            if r > best_r:
                best_r = r
                best_base = stats_norm_to_base[sn]
    if best_base and best_r >= CONFIG["CANON_CUTOFF"]:
        return best_base, best_r

    return b, best_r

# ======================================================
# 5) 四半期表読み込み
# ======================================================
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ======================================================
# 6) 地域別回数読み込み → 市区分キー正規化して集約
# ======================================================
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None, dtype=str)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()].copy()
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key_raw"] = plan_rows[1].astype(str).str.strip()
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)

canon_list = []
fix_logs = []
for x in plan_rows["city_key_raw"].tolist():
    canon, sc = canonize_city_key(x)
    canon_list.append(canon)
    if canon != muni_base(apply_alias(x)) and sc >= 0.93:
        fix_logs.append((x, canon, sc))
plan_rows["city_key"] = canon_list

region_master = plan_rows.groupby(["pref_parent","city_key"], as_index=False)["plan_count"].sum()

if fix_logs:
    print("✅ 市区分キーの自動補正（alias/誤字ゆれ/統計量基準）:")
    for a,b,sc in fix_logs[:40]:
        print(f"  - {a} → {b} (match={sc:.3f})")
    if len(fix_logs) > 40:
        print(f"  ...他 {len(fix_logs)-40} 件")

city_keys = region_master["city_key"].tolist()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# ======================================================
# 7) 候補側の索引（表記norm + 読み）
# ======================================================
city_norm_map = {ck: norm(ck) for ck in city_keys}
norm_to_city = {}
for ck, nk in city_norm_map.items():
    if nk:
        norm_to_city.setdefault(nk, []).append(ck)

reading_to_city = {}
for ck in city_keys:
    rd = to_katakana_reading(ck)
    if rd:
        reading_to_city.setdefault(rd, []).append(ck)

def choose_best_by_fuzzy(query_base, cands):
    qn = norm(query_base)
    best_ck, best_r = None, 0.0
    for ck in cands:
        r = best_ratio(qn, norm(ck))
        if r > best_r:
            best_r = r
            best_ck = ck
    return best_ck, best_r

def match_city_key(city_name: str):
    if not city_name:
        return None, 0.0
    x = apply_alias(city_name)
    b = muni_base(x)
    bn = norm(b)

    if bn in norm_to_city and bn:
        cands = norm_to_city[bn]
        if len(cands) == 1:
            return cands[0], 1.0
        ck, rr = choose_best_by_fuzzy(b, cands)
        return ck, max(0.97, rr)

    for v in gen_variants(b):
        vn = norm(v)
        if vn in norm_to_city and vn:
            cands = norm_to_city[vn]
            if len(cands) == 1:
                return cands[0], 0.995
            ck, rr = choose_best_by_fuzzy(v, cands)
            return ck, max(0.95, rr)

    rd = to_katakana_reading(b)
    cands = reading_to_city.get(rd, [])
    if len(cands) == 1:
        return cands[0], 0.99
    elif len(cands) >= 2:
        ck, rr = choose_best_by_fuzzy(b, cands)
        if ck:
            return ck, max(0.93, rr)

    best_ck, best_r = None, 0.0
    for ck in city_keys:
        r = best_ratio(bn, city_norm_map.get(ck, ""))
        if r > best_r:
            best_r = r
            best_ck = ck
    if best_ck and best_r >= CONFIG["FUZZY_CUTOFF"]:
        return best_ck, best_r
    return None, best_r

def match_city_keys_multi(city_raw: str, venue_raw: str = ""):
    keys = []
    tokens = split_city_tokens(city_raw)
    for tok in tokens:
        ck, sc = match_city_key(tok)
        if ck:
            keys.append(ck)
    if (not tokens or len(keys) == 0) and venue_raw:
        hits = find_city_tokens_in_text(venue_raw, city_keys)
        keys.extend(hits)
    seen = set()
    uniq = []
    for k in keys:
        if k and k not in seen:
            uniq.append(k); seen.add(k)
    return uniq

def add_city_key(df):
    keys1, scores = [], []
    key_lists = []
    for city_raw, venue_raw in zip(df["city_raw"].tolist(), df["venue_raw"].tolist()):
        klist = match_city_keys_multi(city_raw, venue_raw)
        key_lists.append(klist)
        k0 = klist[0] if klist else None
        keys1.append(k0)
        if k0:
            toks = split_city_tokens(city_raw)
            probe = toks[0] if toks else city_raw
            _, sc = match_city_key(probe)
            scores.append(sc)
        else:
            scores.append(0.0)
    out = df.copy()
    out["city_key"] = keys1
    out["city_keys"] = key_lists
    out["match_score"] = scores
    return out

scheduled42 = add_city_key(b42[(b42["city_raw"]!="") | (b42["venue_raw"]!="")].copy())
scheduled43 = add_city_key(b43[(b43["city_raw"]!="") | (b43["venue_raw"]!="")].copy())

open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ======================================================
# 8) 都道府県コード推定（候補city_key→pref_code）
# ======================================================
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
city_gap = {ck: gap_weeks_from_count(int(cnt)) for ck, cnt in plan_count_by_city.items()}

# ======================================================
# 9) スパン履歴（42→43連結）★束ね反映
# ======================================================
week_order_42 = week_order_42 if week_order_42 else []
week_order_43 = week_order_43 if week_order_43 else []
OFFSET_43 = len(week_order_42)

scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43

scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_city = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)

    klist = e.get("city_keys", [])
    if isinstance(klist, list) and klist:
        for ck in klist:
            if ck:
                last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)
    else:
        ck = e.get("city_key", None)
        if ck:
            last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)

# ★A仕様：束ね1回＝構成市すべて1消化
scheduled_counts_43 = {}
for _, e in scheduled43.iterrows():
    klist = e.get("city_keys", [])
    if isinstance(klist, list) and klist:
        for ck in klist:
            if ck:
                scheduled_counts_43[ck] = scheduled_counts_43.get(ck, 0) + 1
    else:
        ck = e.get("city_key", None)
        if ck:
            scheduled_counts_43[ck] = scheduled_counts_43.get(ck, 0) + 1

# ======================================================
# 10) 例外増枠プール（J列=1 & 人口>=18万）
# ======================================================
stats2 = stats.copy()
stats2[muni_col] = stats2[muni_col].astype(str).str.strip()

class_col = stats2.columns[9] if len(stats2.columns) >= 10 else stats2.columns[-1]
stats2[class_col] = pd.to_numeric(stats2[class_col], errors="coerce")

pop_col = next((c for c in stats2.columns if "人口" in str(c)), None)
if pop_col is None:
    num_cols = []
    for c in stats2.columns:
        s = pd.to_numeric(stats2[c], errors="coerce")
        if s.notna().sum() > 0:
            num_cols.append((c, float(s.max(skipna=True))))
    pop_col = sorted(num_cols, key=lambda x: x[1], reverse=True)[0][0] if num_cols else stats2.columns[-1]
stats2[pop_col] = pd.to_numeric(stats2[pop_col], errors="coerce")

ovr = stats2[(stats2[class_col] == CONFIG["OVR_CLASS_J_VALUE"]) & (stats2[pop_col] >= CONFIG["OVR_MIN_POP"])].copy()

ovr_candidates = {}
for name in ovr[muni_col].tolist():
    ck, sc = match_city_key(name)
    if ck:
        pop_val = float(ovr.loc[ovr[muni_col] == name, pop_col].iloc[0])
        if ck not in ovr_candidates or sc > ovr_candidates[ck]["match_score"]:
            ovr_candidates[ck] = {"name_raw": name, "match_score": sc, "pop": pop_val}
OVR_POOL = set(ovr_candidates.keys())
print(f"✅ 例外増枠プール: {len(OVR_POOL)}（J列={class_col}==1 & 人口列={pop_col}>=18万）")

# ======================================================
# 11) 集客率（あれば市区分に寄せる。無ければ0.5固定）
# ======================================================
rate_col = next((c for c in stats2.columns if ("集客率" in str(c) or "来場率" in str(c) or "動員率" in str(c))), None)
city_rate = {}
if rate_col is not None:
    tmp = stats2[[muni_col, rate_col]].copy()
    tmp["rate"] = pd.to_numeric(tmp[rate_col], errors="coerce")
    tmp = tmp.dropna(subset=["rate"])
    muni_rate_norm = {norm(muni_base(row[muni_col])): float(row["rate"]) for _, row in tmp.iterrows()}
    for ck in city_keys:
        cn = norm(ck)
        if cn in muni_rate_norm:
            city_rate[ck] = muni_rate_norm[cn]

def percentile_need(values_dict):
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

city_pct, city_need = percentile_need(city_rate)

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "不明"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def fmt_pct(p):
    if p is None:
        return "不明"
    return f"下位{p*100:.0f}%"

def build_reason(variant, week_id, area_label, ck, pc,
                 need_gap_p, gp, need_gap_c, gc,
                 remaining_plan, overflow_used, overflow_meta,
                 same_week_pref_hit, same_week_city_hit, relax_mode, score):
    lines = []
    lines.append(f"【案{variant}】この枠（{week_id}／{area_label}）はAJ枠が空いていたので、次の順で選びました。")
    lines.append(f"0) 地域フィルタ：この枠は「{area_label}」なので、対象都道府県の範囲内だけから選定しています。")
    if not overflow_used:
        lines.append(f"1) まずは『43期の計画回数の残りがある市区分』から選定。→ {ck} は残り {remaining_plan} 回あるため計画内で入れています。")
    else:
        pop = overflow_meta.get("pop", None)
        pop_txt = f"{int(pop):,}人" if isinstance(pop, (int,float)) and not math.isnan(pop) else "不明"
        nm = overflow_meta.get("name_raw","") or ck
        lines.append("1) 計画回数の残りがある候補だけでは埋め切れなかったため、例外ルールで増枠しています。")
        lines.append(f"   例外条件：統計量のJ列=1 & 人口18万人以上。→ {nm}（人口={pop_txt}）を採用。")

    lines.append(f"2) スパン：県は必要{need_gap_p}週に対し実績{gp}週、市区分は必要{need_gap_c}週に対し実績{gc}週。"
                 + (f"（制約緩和={relax_mode}）" if relax_mode else ""))

    if ck in city_rate:
        lines.append(f"3) 集客率：{fmt_rate(city_rate.get(ck))}（{fmt_pct(city_pct.get(ck))}）。改善余地が大きいほど優先度を上げています。")
    else:
        lines.append("3) 集客率：データが取れない市区分のため、回数とスパンを優先しています。")

    lines.append("4) 同週はカニバりやすいので基本回避。"
                 + ("（同週同県ペナ）" if same_week_pref_hit else "（同週同県は回避）")
                 + ("（同週同市区分ペナ）" if same_week_city_hit else ""))

    lines.append("5) 豪雪の12〜2月・3月1wは原則除外（例外リストで解除可）。祭り/マラソンは未連携で後日追加可能。")
    lines.append(f"【まとめ】回数×スパン×改善余地×同週回避を総合した点が最も高いので採用。（score={score:.2f}）")
    return " / ".join(lines)[:32000]

# 週×都道府県（43期）
week_used_pref_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]; pc = e["pref_code"]
    if w and pc:
        week_used_pref_base.setdefault(w, set()).add(pc)

# ★追加：週×市区分（束ね構成市含む）
week_used_city_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]
    klist = e.get("city_keys", [])
    if not w or not isinstance(klist, list):
        continue
    s = week_used_city_base.setdefault(w, set())
    for ck in klist:
        if ck:
            s.add(ck)

# ======================================================
# 12) プラン生成（A/B/C）
# ======================================================
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used_in_plan = {ck: 0 for ck in city_keys}
    lp_p = dict(last_pos_pref)
    lp_c = dict(last_pos_city)
    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}
    week_used_city = {w:set(s) for w,s in week_used_city_base.items()}

    assigns = []

    def score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow):
        pc = pref_by_city.get(ck, "")

        if not area_allowed(area_label, pc):
            return None

        plan_cnt = plan_count_by_city.get(ck, 0)
        already  = scheduled_counts_43.get(ck, 0)
        remaining = plan_cnt - already - used_in_plan.get(ck, 0)

        overflow_used = False
        overflow_meta = {}

        if remaining <= 0:
            if not allow_overflow:
                return None
            if ck not in OVR_POOL:
                return None
            overflow_used = True
            meta = ovr_candidates.get(ck, {})
            overflow_meta = {"name_raw": meta.get("name_raw",""), "pop": meta.get("pop", float("nan"))}

        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_c = city_gap.get(ck, CONFIG["GAP_WEEKS_MIN"])
        last_p = lp_p.get(pc, None) if pc else None
        last_c = lp_c.get(ck, None)
        gp = 999 if last_p is None else (apos - last_p)
        gc = 999 if last_c is None else (apos - last_c)

        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_c = (last_c is None) or (gc >= need_gap_c)

        if relax_mode is None:
            if not (ok_p and ok_c):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪（市区分キー単位で判定）
        if pc and is_snow_blackout(pc, ck, week_id):
            return None

        same_week_pref_hit = (pc and pc in week_used_pref.get(week_id, set()))
        same_week_city_hit = (ck and ck in week_used_city.get(week_id, set()))

        same_week_pen = (CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0) + \
                        (CONFIG["SAME_WEEK_CITY_PENALTY"] if same_week_city_hit else 0.0)
        overflow_pen = CONFIG["OVR_OVERFLOW_PENALTY"] if overflow_used else 0.0

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        slack_p = gp - need_gap_p
        slack_c = gc - need_gap_c
        need_city = city_need.get(ck, 0.5)

        score = (
            slack_p * W["pref_slack"] +
            slack_c * W["city_slack"] +
            max(remaining, 0) * W["unmet_bonus"] * 5.0 +
            (need_city * 10) * W["low_attr_city"] -
            relax_pen - same_week_pen - overflow_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        reason = build_reason(
            variant, week_id, area_label, ck, pc,
            need_gap_p, gp, need_gap_c, gc,
            (plan_cnt - already - used_in_plan.get(ck,0)),
            overflow_used, overflow_meta,
            same_week_pref_hit, same_week_city_hit, relax_mode, score
        )

        return {"ck": ck, "pc": pc, "score": float(score), "reason": reason}

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]
        area_label = slot.get("area_label", "")

        best = None

        # 計画内で探す
        for relax_mode in [None, "Aのみ", "B+A"]:
            for ck in city_keys:
                cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=False)
                if cand and (best is None or cand["score"] > best["score"]):
                    best = cand
            if best is not None:
                break

        # 例外増枠
        if best is None:
            for relax_mode in [None, "Aのみ", "B+A"]:
                for ck in city_keys:
                    cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=True)
                    if cand and (best is None or cand["score"] > best["score"]):
                        best = cand
                if best is not None:
                    break

        if best is None:
            assigns.append({
                "variant": variant, "row_header": int(slot["row_header"]),
                "assign_city_key": "", "pref_code_guess": "", "score": float("-inf"),
                "reason_BT": f"案{variant}: 条件により候補なし（地域={area_label}）"
            })
            continue

        ck = best["ck"]; pc = best["pc"]

        # ★プラン内使用回数＆履歴更新
        used_in_plan[ck] += 1
        lp_c[ck] = apos
        week_used_city.setdefault(week_id, set()).add(ck)

        if pc:
            lp_p[pc] = apos
            week_used_pref.setdefault(week_id, set()).add(pc)

        assigns.append({
            "variant": variant, "row_header": int(slot["row_header"]),
            "assign_city_key": ck, "pref_code_guess": pc, "score": best["score"],
            "reason_BT": best["reason"]
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

def to_map(df, col):
    return {int(r["row_header"]): r[col] for _, r in df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")
A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")
A_reason = to_map(planA, "reason_BT")

# ======================================================
# 13) 書き戻し（43期の空欄AJ枠に A + (B/C併記) + BT理由）
# ======================================================
wb = openpyxl.load_workbook(QUARTER_XLSX)

for name in list(wb.sheetnames):
    if name not in {sheet42, sheet43}:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1

    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value
    if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
        continue

    a = str(A_city.get(int(row0), "") or "")
    b = str(B_city.get(int(row0), "") or "")
    c = str(C_city.get(int(row0), "") or "")

    ws43.cell(r, COL_CITY+1).value = a
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b if b else '-'} / C:{c if c else '-'}）"

    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    if (ws43.cell(r, COL_PREF+1).value is None) or (str(ws43.cell(r, COL_PREF+1).value).strip()==""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    ws43.cell(r, COL_REASON_BT+1).value = str(A_reason.get(int(row0), "案A: 理由生成なし"))
    written += 1

wb.save(OUT_QUARTER_ABC)
print("\n✅ 入力:", QUARTER_XLSX)
print("✅ 出力:", OUT_QUARTER_ABC)
print("✅ 43期 空欄AJ枠 書込数:", written)
print("✅ 地域フィルタ定義:")
print("  東:", sorted(EAST_PREF_NUMS))
print("  西:", sorted(WEST_PREF_NUMS))
print("  九州:", sorted(KYUSHU_PREF_NUMS))


✅ 例外増枠プール: 1（J列=Zの4分位（1=赤,2=黄,3=青,4=灰）==1 & 人口列=人口>=18万）

✅ 入力: SA+AJ+共有用_四半期表20240303.xlsx
✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx
✅ 43期 空欄AJ枠 書込数: 137
✅ 地域フィルタ定義:
  東: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24]
  西: [17, 18, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39]
  九州: [34, 35, 40, 41, 42, 43, 44, 45, 46]


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）
# ★追加：東/西/九州の都道府県範囲フィルタ
# ★追加：①alias ②読みキー ③誤字ゆれ生成 による表記ゆれ吸収
# （会場マスタは使わない）
# ============================================

!pip -q install fugashi unidic-lite jaconv

import os, re, math, random, difflib
import pandas as pd
import openpyxl
import jaconv
from fugashi import Tagger

tagger = Tagger()

# ====== 入力 ======
QUARTER_XLSX     = "SA+AJ+共有用_四半期表20240303.xlsx"
REGION_XLSX      = "43期地域別会場回数.xlsx"
STATS_XLSX       = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

# ====== 出力（42/43のみ残して出す）======
OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx"

# ====== 列（0-based）=====
COL_WEEK = 0
COL_AREA_OR_KIND = 1     # ★ヘッダ行=東/西/九州、次行=AJ/合同/SA
COL_CITY = 2             # 都道府県/市区分（例：静岡、難波、博多…）
COL_VENUE = 3
COL_PREF = 5
COL_REASON_BT = 72       # BT

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,
    "SAME_WEEK_PREF_PENALTY": 80.0,  # 同週同県ペナ

    # 表記ゆれ
    "FUZZY_CUTOFF": 0.86,
    "CANON_CUTOFF": 0.93,

    # 豪雪
    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    # 例外増枠
    "OVR_CLASS_J_VALUE": 1,
    "OVR_MIN_POP": 180000,
    "OVR_OVERFLOW_PENALTY": 15.0,

    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2,
              "low_attr_city": 2.0, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0,
              "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2,
              "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ======================================================
# 0) alias辞書（CSV）…追記して育てる運用
# ======================================================
ALIAS_CSV = "市区分_alias.csv"

def ensure_alias_template(path=ALIAS_CSV):
    if os.path.exists(path):
        return
    df = pd.DataFrame([
        {"alias":"なんば","canonical":"難波"},
        {"alias":"薩摩河内","canonical":"薩摩川内"},
    ])
    df.to_csv(path, index=False, encoding="utf-8-sig")
    print(f"✅ aliasテンプレ作成: {path}（必要に応じて追記してください）")

def load_alias_map(path=ALIAS_CSV):
    ensure_alias_template(path)
    try:
        df = pd.read_csv(path, dtype=str).fillna("")
        df["alias"] = df["alias"].astype(str).str.strip()
        df["canonical"] = df["canonical"].astype(str).str.strip()
        return {a:c for a,c in zip(df["alias"], df["canonical"]) if a and c}
    except Exception as e:
        print("⚠️ alias読み込み失敗。aliasなしで続行:", e)
        return {}

ALIAS_MAP = load_alias_map(ALIAS_CSV)

def apply_alias(s: str) -> str:
    if s is None:
        return ""
    t = str(s).strip()
    if not t:
        return ""
    return ALIAS_MAP.get(t, t)

# ======================================================
# 1) 正規化・読みキー・誤字ゆれ生成
# ======================================================
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def muni_base(name: str) -> str:
    if name is None:
        return ""
    s = str(name).strip()
    s = re.sub(r"(都|道|府|県)$", "", s)
    s = re.sub(r"(市|区|町|村)$", "", s)
    return s

def to_katakana_reading(s: str) -> str:
    if s is None:
        return ""
    s = str(s).strip()
    if not s:
        return ""
    s2 = re.sub(r"[ \t\r\n\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)

    if re.search(r"[ぁ-んァ-ン]", s2):
        s2 = jaconv.normalize(jaconv.hira2kata(s2))
        s2 = re.sub(r"[^ァ-ン0-9A-Z]", "", s2)
        return s2

    yomi_parts = []
    for w in tagger(s2):
        feat = w.feature
        reading = None
        for k in ["reading", "kana", "pron"]:
            if hasattr(feat, k):
                reading = getattr(feat, k)
                break
        if not reading or reading == "*":
            reading = w.surface
        yomi_parts.append(reading)

    yomi = "".join(yomi_parts)
    yomi = jaconv.normalize(jaconv.hira2kata(yomi))
    yomi = re.sub(r"[^ァ-ン0-9A-Z]", "", yomi)
    return yomi

CONFUSION = {
    "川": ["河"],
    "河": ["川"],
    "崎": ["﨑"],
    "﨑": ["崎"],
    "ヶ": ["ケ"],
    "ケ": ["ヶ"],
    "斉": ["齋", "斎"],
    "齋": ["斉", "斎"],
    "斎": ["斉", "齋"],
    "邊": ["辺", "邉"],
    "邉": ["辺", "邊"],
    "辺": ["邊", "邉"],
}

def gen_variants(s: str, limit=12):
    if s is None:
        return [""]
    s = str(s)
    vars_ = {s}
    for a, bs in CONFUSION.items():
        if a in s:
            new_set = set(vars_)
            for v in vars_:
                for b in bs:
                    new_set.add(v.replace(a, b))
            vars_ = new_set
        if len(vars_) >= limit:
            break
    return list(vars_)[:limit]

def best_ratio(a, b):
    return difflib.SequenceMatcher(None, a, b).ratio()

# ======================================================
# 2) 東/西/九州 の都道府県範囲（ユーザー指定）
#   - 東：北海道～富山/愛知/岐阜/三重あたりまで
#   - 西：石川/福井/滋賀あたり～岡山/鳥取/島根まで ＋ 四国（36-39）
#   - 九州：山口県＋広島県＋九州（沖縄=47は除外）
# ======================================================
# 2桁都道府県コード（01〜47）
EAST_PREF_NUMS  = set(list(range(1, 17)) + [19,20,21,22,23,24])      # 01-16, 19-24（17,18は西へ）
WEST_PREF_NUMS  = set([17,18] + list(range(25, 34)) + [31,32] +      # 17-18, 25-33（31,32含む）
                      [36,37,38,39])                                 # ★四国 36-39 を西に追加
KYUSHU_PREF_NUMS= set([34,35] + list(range(40, 47)))                 # ★広島34, 山口35, 40-46（沖縄47除外）

def pref_num(pref_code_str: str):
    if not pref_code_str:
        return None
    s = str(pref_code_str).strip()
    m = re.match(r"^(\d{2})", s)
    return int(m.group(1)) if m else None

def area_allowed(area_label: str, pref_code_str: str) -> bool:
    """
    ヘッダのエリア（東/西/九州）で都道府県範囲を制限
    """
    al = (str(area_label).strip() if area_label else "")
    pn = pref_num(pref_code_str)
    if pn is None:
        return True  # 不明な時は落とさない（後段でスパン等に任せる）
    if al == "東":
        return pn in EAST_PREF_NUMS
    if al == "西":
        return pn in WEST_PREF_NUMS
    if al == "九州":
        return pn in KYUSHU_PREF_NUMS
    return True


# ======================================================
# 3) 補助関数（週順、スパン、豪雪）
# ======================================================
def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, city_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if city_key and city_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in {1}:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]

        area_label = "" if pd.isna(header[COL_AREA_OR_KIND]) else str(header[COL_AREA_OR_KIND]).strip()  # ★東/西/九州
        kind = kind_norm(detail[COL_AREA_OR_KIND])  # ★AJ/合同/SA

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "area_label": area_label,
            "kind": kind, "city_raw": city, "venue_raw": venue, "pref_code": pref
        })
    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

# ======================================================
# 4) 統計量（例外増枠用 + 正規化基準）
# ======================================================
stats = pd.read_excel(STATS_XLSX, sheet_name=0)
stats.columns = [str(c).strip() for c in stats.columns]
muni_col = "市区町村" if "市区町村" in stats.columns else stats.columns[0]

stats_base_list = stats[muni_col].astype(str).map(muni_base).map(str.strip)
stats_base_list = stats_base_list[stats_base_list != ""].dropna().unique().tolist()

stats_norm_to_base = {norm(x): x for x in stats_base_list if x}
stats_norms = list(stats_norm_to_base.keys())

def canonize_city_key(city_key_raw: str):
    """
    地域別回数表の市区分を統計量ベース名へ寄せる
    優先：alias → base → 誤字ゆれ生成 → fuzzy
    """
    if not city_key_raw:
        return "", 0.0
    x = apply_alias(city_key_raw)
    b = muni_base(x)
    bn = norm(b)

    if bn in stats_norm_to_base and bn:
        return stats_norm_to_base[bn], 1.0

    for v in gen_variants(b):
        vn = norm(v)
        if vn in stats_norm_to_base and vn:
            return stats_norm_to_base[vn], 0.995

    best_base, best_r = None, 0.0
    for v in [b] + gen_variants(b):
        vn = norm(v)
        for sn in stats_norms:
            r = best_ratio(vn, sn)
            if r > best_r:
                best_r = r
                best_base = stats_norm_to_base[sn]
    if best_base and best_r >= CONFIG["CANON_CUTOFF"]:
        return best_base, best_r

    return b, best_r

# ======================================================
# 5) 四半期表読み込み
# ======================================================
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ======================================================
# 6) 地域別回数読み込み → 市区分キー正規化して集約
# ======================================================
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None, dtype=str)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()].copy()
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key_raw"] = plan_rows[1].astype(str).str.strip()
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)

canon_list = []
fix_logs = []
for x in plan_rows["city_key_raw"].tolist():
    canon, sc = canonize_city_key(x)
    canon_list.append(canon)
    if canon != muni_base(apply_alias(x)) and sc >= 0.93:
        fix_logs.append((x, canon, sc))
plan_rows["city_key"] = canon_list

region_master = plan_rows.groupby(["pref_parent","city_key"], as_index=False)["plan_count"].sum()

if fix_logs:
    print("✅ 市区分キーの自動補正（alias/誤字ゆれ/統計量基準）:")
    for a,b,sc in fix_logs[:40]:
        print(f"  - {a} → {b} (match={sc:.3f})")
    if len(fix_logs) > 40:
        print(f"  ...他 {len(fix_logs)-40} 件")

city_keys = region_master["city_key"].tolist()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# ======================================================
# 7) 候補側の索引（表記norm + 読み）
# ======================================================
city_norm_map = {ck: norm(ck) for ck in city_keys}
norm_to_city = {}
for ck, nk in city_norm_map.items():
    if nk:
        norm_to_city.setdefault(nk, []).append(ck)

reading_to_city = {}
for ck in city_keys:
    rd = to_katakana_reading(ck)
    if rd:
        reading_to_city.setdefault(rd, []).append(ck)

def choose_best_by_fuzzy(query_base, cands):
    qn = norm(query_base)
    best_ck, best_r = None, 0.0
    for ck in cands:
        r = best_ratio(qn, norm(ck))
        if r > best_r:
            best_r = r
            best_ck = ck
    return best_ck, best_r

def match_city_key(city_name: str):
    """
    四半期表側の市区分（入力）を候補キーにマッチ
    優先：alias → 表記一致 → 誤字ゆれ一致 → 読み一致 → fuzzy
    """
    if not city_name:
        return None, 0.0
    x = apply_alias(city_name)
    b = muni_base(x)
    bn = norm(b)

    if bn in norm_to_city and bn:
        cands = norm_to_city[bn]
        if len(cands) == 1:
            return cands[0], 1.0
        ck, rr = choose_best_by_fuzzy(b, cands)
        return ck, max(0.97, rr)

    for v in gen_variants(b):
        vn = norm(v)
        if vn in norm_to_city and vn:
            cands = norm_to_city[vn]
            if len(cands) == 1:
                return cands[0], 0.995
            ck, rr = choose_best_by_fuzzy(v, cands)
            return ck, max(0.95, rr)

    rd = to_katakana_reading(b)
    cands = reading_to_city.get(rd, [])
    if len(cands) == 1:
        return cands[0], 0.99
    elif len(cands) >= 2:
        ck, rr = choose_best_by_fuzzy(b, cands)
        if ck:
            return ck, max(0.93, rr)

    best_ck, best_r = None, 0.0
    for ck in city_keys:
        r = best_ratio(bn, city_norm_map.get(ck, ""))
        if r > best_r:
            best_r = r
            best_ck = ck
    if best_ck and best_r >= CONFIG["FUZZY_CUTOFF"]:
        return best_ck, best_r
    return None, best_r

def add_city_key(df):
    keys, scores = [], []
    for x in df["city_raw"].tolist():
        k, s = match_city_key(x)
        keys.append(k); scores.append(s)
    out = df.copy()
    out["city_key"] = keys
    out["match_score"] = scores
    return out

scheduled42 = add_city_key(b42[(b42["city_raw"]!="") | (b42["venue_raw"]!="")].copy())
scheduled43 = add_city_key(b43[(b43["city_raw"]!="") | (b43["venue_raw"]!="")].copy())

# 43期の空きAJ枠（ヘッダが空欄）
open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ======================================================
# 8) 都道府県コード推定（候補city_key→pref_code）
# ======================================================
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
city_gap = {ck: gap_weeks_from_count(int(cnt)) for ck, cnt in plan_count_by_city.items()}

# ======================================================
# 9) スパン履歴（42→43連結）
# ======================================================
week_order_42 = week_order_42 if week_order_42 else []
week_order_43 = week_order_43 if week_order_43 else []
OFFSET_43 = len(week_order_42)

scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43

scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_city = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    ck = e["city_key"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)
    if ck:
        last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)

scheduled_counts_43 = scheduled43.dropna(subset=["city_key"]).groupby("city_key").size().to_dict()

# ======================================================
# 10) 例外増枠プール（J列=1 & 人口>=18万）
# ======================================================
stats2 = stats.copy()
stats2[muni_col] = stats2[muni_col].astype(str).str.strip()

# J列=10列目
class_col = stats2.columns[9] if len(stats2.columns) >= 10 else stats2.columns[-1]
stats2[class_col] = pd.to_numeric(stats2[class_col], errors="coerce")

pop_col = next((c for c in stats2.columns if "人口" in str(c)), None)
if pop_col is None:
    # 数値列の最大を人口っぽい列として採用
    num_cols = []
    for c in stats2.columns:
        s = pd.to_numeric(stats2[c], errors="coerce")
        if s.notna().sum() > 0:
            num_cols.append((c, float(s.max(skipna=True))))
    pop_col = sorted(num_cols, key=lambda x: x[1], reverse=True)[0][0] if num_cols else stats2.columns[-1]
stats2[pop_col] = pd.to_numeric(stats2[pop_col], errors="coerce")

ovr = stats2[(stats2[class_col] == CONFIG["OVR_CLASS_J_VALUE"]) & (stats2[pop_col] >= CONFIG["OVR_MIN_POP"])].copy()

ovr_candidates = {}
for name in ovr[muni_col].tolist():
    ck, sc = match_city_key(name)
    if ck:
        pop_val = float(ovr.loc[ovr[muni_col] == name, pop_col].iloc[0])
        if ck not in ovr_candidates or sc > ovr_candidates[ck]["match_score"]:
            ovr_candidates[ck] = {"name_raw": name, "match_score": sc, "pop": pop_val}
OVR_POOL = set(ovr_candidates.keys())
print(f"✅ 例外増枠プール: {len(OVR_POOL)}（J列={class_col}==1 & 人口列={pop_col}>=18万）")

# ======================================================
# 11) 集客率（あれば市区分に寄せる。無ければ0.5固定）
# ======================================================
rate_col = next((c for c in stats2.columns if ("集客率" in str(c) or "来場率" in str(c) or "動員率" in str(c))), None)
city_rate = {}
if rate_col is not None:
    tmp = stats2[[muni_col, rate_col]].copy()
    tmp["rate"] = pd.to_numeric(tmp[rate_col], errors="coerce")
    tmp = tmp.dropna(subset=["rate"])
    muni_rate_norm = {norm(muni_base(row[muni_col])): float(row["rate"]) for _, row in tmp.iterrows()}
    for ck in city_keys:
        cn = norm(ck)
        if cn in muni_rate_norm:
            city_rate[ck] = muni_rate_norm[cn]

def percentile_need(values_dict):
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

city_pct, city_need = percentile_need(city_rate)

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "不明"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def fmt_pct(p):
    if p is None:
        return "不明"
    return f"下位{p*100:.0f}%"

def build_reason(variant, week_id, area_label, ck, pc,
                 need_gap_p, gp, need_gap_c, gc,
                 remaining_plan, overflow_used, overflow_meta,
                 same_week_pref_hit, relax_mode, score):
    lines = []
    lines.append(f"【案{variant}】この枠（{week_id}／{area_label}）はAJ枠が空いていたので、次の順で選びました。")
    lines.append(f"0) 地域フィルタ：この枠は「{area_label}」なので、対象都道府県の範囲内だけから選定しています。")
    if not overflow_used:
        lines.append(f"1) まずは『43期の計画回数の残りがある市区分』から選定。→ {ck} は残り {remaining_plan} 回あるため計画内で入れています。")
    else:
        pop = overflow_meta.get("pop", None)
        pop_txt = f"{int(pop):,}人" if isinstance(pop, (int,float)) and not math.isnan(pop) else "不明"
        nm = overflow_meta.get("name_raw","") or ck
        lines.append("1) 計画回数の残りがある候補だけでは埋め切れなかったため、例外ルールで増枠しています。")
        lines.append(f"   例外条件：統計量のJ列=1 & 人口18万人以上。→ {nm}（人口={pop_txt}）を採用。")

    lines.append(f"2) スパン：県は必要{need_gap_p}週に対し実績{gp}週、市区分は必要{need_gap_c}週に対し実績{gc}週。"
                 + (f"（制約緩和={relax_mode}）" if relax_mode else ""))

    if ck in city_rate:
        lines.append(f"3) 集客率：{fmt_rate(city_rate.get(ck))}（{fmt_pct(city_pct.get(ck))}）。改善余地が大きいほど優先度を上げています。")
    else:
        lines.append("3) 集客率：データが取れない市区分のため、回数とスパンを優先しています。")

    lines.append("4) 同週同県はカニバりやすいので基本回避。"
                 + ("（ただし候補都合で同県になりペナルティ付き）" if same_week_pref_hit else "（今回は回避できています）"))

    lines.append("5) 豪雪の12〜2月・3月1wは原則除外（例外リストで解除可）。祭り/マラソンは未連携で後日追加可能。")
    lines.append(f"【まとめ】回数×スパン×改善余地×同週同県回避を総合した点が最も高いので採用。（score={score:.2f}）")
    return " / ".join(lines)[:32000]

# 週×都道府県（43期）
week_used_pref_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]; pc = e["pref_code"]
    if w and pc:
        week_used_pref_base.setdefault(w, set()).add(pc)

# ======================================================
# 12) プラン生成（A/B/C）…★エリア範囲フィルタを強制
# ======================================================
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used_in_plan = {ck: 0 for ck in city_keys}
    lp_p = dict(last_pos_pref)
    lp_c = dict(last_pos_city)
    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}

    assigns = []

    def score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow):
        pc = pref_by_city.get(ck, "")

        # ★地域フィルタ（東/西/九州）
        if not area_allowed(area_label, pc):
            return None

        plan_cnt = plan_count_by_city.get(ck, 0)
        already  = scheduled_counts_43.get(ck, 0)
        remaining = plan_cnt - already - used_in_plan.get(ck, 0)

        overflow_used = False
        overflow_meta = {}

        if remaining <= 0:
            if not allow_overflow:
                return None
            if ck not in OVR_POOL:
                return None
            overflow_used = True
            meta = ovr_candidates.get(ck, {})
            overflow_meta = {"name_raw": meta.get("name_raw",""), "pop": meta.get("pop", float("nan"))}

        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_c = city_gap.get(ck, CONFIG["GAP_WEEKS_MIN"])
        last_p = lp_p.get(pc, None) if pc else None
        last_c = lp_c.get(ck, None)
        gp = 999 if last_p is None else (apos - last_p)
        gc = 999 if last_c is None else (apos - last_c)

        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_c = (last_c is None) or (gc >= need_gap_c)

        if relax_mode is None:
            if not (ok_p and ok_c):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪
        if pc and is_snow_blackout(pc, ck, week_id):
            return None

        same_week_pref_hit = (pc and pc in week_used_pref.get(week_id, set()))
        same_week_pen = CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0
        overflow_pen = CONFIG["OVR_OVERFLOW_PENALTY"] if overflow_used else 0.0

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        slack_p = gp - need_gap_p
        slack_c = gc - need_gap_c
        need_city = city_need.get(ck, 0.5)

        score = (
            slack_p * W["pref_slack"] +
            slack_c * W["city_slack"] +
            max(remaining, 0) * W["unmet_bonus"] * 5.0 +
            (need_city * 10) * W["low_attr_city"] -
            relax_pen - same_week_pen - overflow_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        reason = build_reason(
            variant, week_id, area_label, ck, pc,
            need_gap_p, gp, need_gap_c, gc,
            (plan_cnt - already - used_in_plan.get(ck,0)),
            overflow_used, overflow_meta,
            same_week_pref_hit, relax_mode, score
        )

        return {"ck": ck, "pc": pc, "score": float(score), "reason": reason}

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]
        area_label = slot.get("area_label", "")

        best = None

        # 計画内で探す
        for relax_mode in [None, "Aのみ", "B+A"]:
            for ck in city_keys:
                cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=False)
                if cand and (best is None or cand["score"] > best["score"]):
                    best = cand
            if best is not None:
                break

        # 例外増枠
        if best is None:
            for relax_mode in [None, "Aのみ", "B+A"]:
                for ck in city_keys:
                    cand = score_city(ck, apos, week_id, area_label, relax_mode, allow_overflow=True)
                    if cand and (best is None or cand["score"] > best["score"]):
                        best = cand
                if best is not None:
                    break

        if best is None:
            assigns.append({
                "variant": variant, "row_header": int(slot["row_header"]),
                "assign_city_key": "", "pref_code_guess": "", "score": float("-inf"),
                "reason_BT": f"案{variant}: 条件により候補なし（地域={area_label}）"
            })
            continue

        ck = best["ck"]; pc = best["pc"]

        used_in_plan[ck] += 1
        lp_c[ck] = apos
        if pc:
            lp_p[pc] = apos
            week_used_pref.setdefault(week_id, set()).add(pc)

        assigns.append({
            "variant": variant, "row_header": int(slot["row_header"]),
            "assign_city_key": ck, "pref_code_guess": pc, "score": best["score"],
            "reason_BT": best["reason"]
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

def to_map(df, col):
    return {int(r["row_header"]): r[col] for _, r in df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")
A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")
A_reason = to_map(planA, "reason_BT")

# ======================================================
# 13) 書き戻し（43期の空欄AJ枠に A + (B/C併記) + BT理由）
# ======================================================
wb = openpyxl.load_workbook(QUARTER_XLSX)

# 42/43以外は削除
for name in list(wb.sheetnames):
    if name not in {sheet42, sheet43}:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1

    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value
    if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
        continue

    a = str(A_city.get(int(row0), "") or "")
    b = str(B_city.get(int(row0), "") or "")
    c = str(C_city.get(int(row0), "") or "")

    ws43.cell(r, COL_CITY+1).value = a
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b if b else '-'} / C:{c if c else '-'}）"

    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    if (ws43.cell(r, COL_PREF+1).value is None) or (str(ws43.cell(r, COL_PREF+1).value).strip()==""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    ws43.cell(r, COL_REASON_BT+1).value = str(A_reason.get(int(row0), "案A: 理由生成なし"))
    written += 1

wb.save(OUT_QUARTER_ABC)
print("\n✅ 出力:", OUT_QUARTER_ABC)
print("✅ 43期 空欄AJ枠 書込数:", written)
print("✅ 地域フィルタ定義:")
print("  東:", sorted(EAST_PREF_NUMS))
print("  西:", sorted(WEST_PREF_NUMS))
print("  九州:", sorted(KYUSHU_PREF_NUMS))


  Preparing metadata (setup.py) ... [?25l[?25hdone
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/694.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m694.9/694.9 kB[0m [31m32.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for unidic-lite (setup.py) ... [?25l[?25hdone
✅ aliasテンプレ作成: 市区分_alias.csv（必要に応じて追記してください）
✅ 例外増枠プール: 1（J列=Zの4分位（1=赤,2=黄,3=青,4=灰）==1 & 人口列=人口>=18万）

✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_東西九州範囲対応_市区分ゆれ吸収_42_43のみ_理由BT.xlsx
✅ 43期 空欄AJ枠 書込数: 137
✅ 地域フィルタ定義:
  東: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 20, 21, 22, 23, 24]
  西: [17, 18, 25, 26, 27, 28, 29, 30, 31, 32, 33, 36, 37, 38, 39]
  九州: [34, 35, 40, 41, 42, 43, 44, 45, 46]


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）【不足時のみ例外増枠OK + 理由を上司向け口語に】
#
# ✅基本：43期地域別会場回数（plan_count）を上限として使う
# ✅ただし「その枠が候補ゼロで埋められない」時だけ、例外増枠を許可
#   - 市区町村統計量（Excel）J列=1
#   - 人口>=18万人
#   - 豪雪/外部イベントは引き続き除外
# ✅BT列の理由：上司プレゼン向けに、根拠を口語で分かりやすく
# ✅同週同県はなるべく避ける（ペナルティ）
# ✅F列(例:25滋賀)を書き込む（空欄なら）
# ✅出力は 42期マスタ / 43期マスタ のみ（1ファイル）
# ✅D列に B/C案を併記（欠損でも "-" として必ず出す）
#
# 入力:
#   SA+AJ+共有用_四半期表20240303.xlsx
#   43期地域別会場回数.xlsx
#   市区町村_統計量_全国 (1).xlsx
#   豪雪例外リスト.xlsx（無ければ自動生成）
#
# 出力:
#   SA+AJ+共有用_四半期表20240303_43期提案ABC_不足時例外増枠_42_43のみ_理由BT.xlsx
# ============================================

import os, re, math, random, difflib
import pandas as pd
import openpyxl

# ====== 入力 ======
QUARTER_XLSX     = "SA+AJ+共有用_四半期表20240303.xlsx"
REGION_XLSX      = "43期地域別会場回数.xlsx"
STATS_XLSX       = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

# ====== 出力（1ファイル） ======
OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_不足時例外増枠_42_43のみ_理由BT.xlsx"

# ====== 列（0-based）=====
COL_WEEK = 0           # A 週ID
COL_CITY = 2           # C 市区分（A案）
COL_VENUE = 3          # D 会場（会場要検討 + B/C案併記）
COL_PREF = 5           # F 都道府県コード（例 25滋賀）
COL_KIND_DETAIL = 1    # 形態行のB列
COL_REASON_BT = 72     # BT列（openpyxlでは 1-based=72）

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,
    "SAME_WEEK_PREF_PENALTY": 80.0,   # 同週同県を避ける
    "FUZZY_CUTOFF": 0.82,
    "MANUAL_CITY_MAP": {},
    "EXTERNAL_EVENT_BLACKOUT": [],

    # 豪雪ブラックアウト（暫定）
    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    # 例外増枠条件
    "OVR_CLASS_J_VALUE": 1,          # 統計量シートの「J列=1」
    "OVR_MIN_POP": 180000,           # 人口>=18万人
    "OVR_OVERFLOW_PENALTY": 15.0,    # “例外増枠”を乱発しないための軽いペナ（不足時にしか使わないので控えめ）

    # 3案の重み
    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2,
              "low_attr_pref": 2.0, "low_attr_city": 1.5, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0,
              "low_attr_pref": 0.8, "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2,
              "low_attr_pref": 3.8, "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ====== Utility ======
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    ws["C2"] = "豪雪NGを例外的にOKにしたい対象を追加。空欄は無視。"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, city_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if city_key and city_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in CONFIG["SNOW_BLACKOUT_MARCH_W"]:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]
        kind = kind_norm(detail[COL_KIND_DETAIL])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "kind": kind,
            "city_raw": city,
            "venue_raw": venue,
            "pref_code": pref
        })

    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

def best_fuzzy_key(a_norm, candidates_norm):
    best_ratio, best_norm = 0.0, ""
    for cand in candidates_norm:
        if not cand:
            continue
        r = difflib.SequenceMatcher(None, a_norm, cand).ratio()
        if r > best_ratio:
            best_ratio, best_norm = r, cand
    return best_norm, best_ratio

# ====== 1) 四半期表読み込み ======
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ====== 2) 地域別回数（43期上限のベース） ======
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()]
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key"] = plan_rows[1].astype(str)
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)
region_master = plan_rows[["pref_parent","city_key","plan_count"]].reset_index(drop=True)

city_keys = region_master["city_key"].tolist()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

city_norm_map = {k: norm(k) for k in city_keys}
_norm_values = list(city_norm_map.values())
_norm_to_key = {city_norm_map[k]: k for k in city_norm_map}

def match_city_key(city_name):
    if not city_name:
        return None, 0.0
    if city_name in CONFIG["MANUAL_CITY_MAP"]:
        return CONFIG["MANUAL_CITY_MAP"][city_name], 1.0
    a = norm(city_name)
    if a in _norm_to_key and a:
        return _norm_to_key[a], 1.0
    for ck, ckn in city_norm_map.items():
        if ckn and (ckn in a or a in ckn):
            return ck, 0.95
    bn, ratio = best_fuzzy_key(a, _norm_values)
    if ratio >= CONFIG["FUZZY_CUTOFF"] and bn:
        return _norm_to_key[bn], ratio
    return None, ratio

def add_city_key(df):
    keys, scores = [], []
    for x in df["city_raw"].tolist():
        k, s = match_city_key(x)
        keys.append(k); scores.append(s)
    out = df.copy()
    out["city_key"] = keys
    out["match_score"] = scores
    return out

scheduled42 = b42[(b42["city_raw"]!="") | (b42["venue_raw"]!="")].copy()
scheduled43 = b43[(b43["city_raw"]!="") | (b43["venue_raw"]!="")].copy()
scheduled42 = add_city_key(scheduled42)
scheduled43 = add_city_key(scheduled43)

open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ====== 3) 都道府県コード推定 ======
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()

pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
city_gap = {ck: gap_weeks_from_count(int(cnt)) for ck, cnt in plan_count_by_city.items()}

# ====== 4) 42→43のスパン履歴用タイムライン ======
OFFSET_43 = len(week_order_42)
scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43
scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_city = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    ck = e["city_key"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)
    if ck:
        last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)

# ====== 5) 43期内の既存回数 ======
scheduled_counts_43 = scheduled43.dropna(subset=["city_key"]).groupby("city_key").size().to_dict()

# ====== 6) 統計量（J列=1 + 人口>=18万）で “例外候補プール” を作る ======
stats = pd.read_excel(STATS_XLSX, sheet_name=0)
stats.columns = [str(c).strip() for c in stats.columns]

# 市区町村名列を推定
muni_col = next((c for c in stats.columns if c in ["市区町村","市町村","自治体","市区分","municipality"]), None)
if muni_col is None:
    # それっぽい文字列列を拾う
    obj_cols = [c for c in stats.columns if stats[c].dtype == object]
    muni_col = obj_cols[0] if obj_cols else stats.columns[0]

# 人口列を推定
pop_col = next((c for c in stats.columns if ("人口" in c or "総人口" in c)), None)

# J列（ExcelのJ列＝10列目）を取得：ヘッダ名が分からない前提で位置優先
# ただし "J" という列名があればそれを優先
class_col = "J" if "J" in stats.columns else None
if class_col is None:
    # "分類"っぽい列があればそれを優先
    class_col = next((c for c in stats.columns if ("分類" in c or "クラス" in c or "class" in c.lower())), None)
if class_col is None:
    # 最終手段：10列目（0-based 9）
    if len(stats.columns) >= 10:
        class_col = stats.columns[9]
    else:
        class_col = stats.columns[-1]

# 人口列が見つからない場合：次善として11列目（0-based 10）を使う
if pop_col is None:
    if len(stats.columns) >= 11:
        pop_col = stats.columns[10]
    else:
        # どうしても無いなら例外候補は作れない
        pop_col = None

stats2 = stats.copy()
stats2[muni_col] = stats2[muni_col].astype(str).str.strip()
stats2[class_col] = pd.to_numeric(stats2[class_col], errors="coerce")
if pop_col is not None:
    stats2[pop_col] = pd.to_numeric(stats2[pop_col], errors="coerce")
else:
    stats2["__pop__"] = float("nan")
    pop_col = "__pop__"

# 条件抽出
ovr = stats2[(stats2[class_col] == CONFIG["OVR_CLASS_J_VALUE"]) & (stats2[pop_col] >= CONFIG["OVR_MIN_POP"])].copy()

# 例外候補：統計ファイルの市区町村名→city_key へマッチして集める
ovr_candidates = {}
for name in ovr[muni_col].tolist():
    ck, sc = match_city_key(name)
    if ck:
        # 同じckが複数名で引っかかったら、より高スコアを採用
        if ck not in ovr_candidates or sc > ovr_candidates[ck]["match_score"]:
            pop_val = float(ovr.loc[ovr[muni_col] == name, pop_col].iloc[0])
            ovr_candidates[ck] = {"name_raw": name, "match_score": sc, "pop": pop_val}

OVR_POOL = set(ovr_candidates.keys())
print(f"✅ 例外増枠プール: {len(OVR_POOL)} 市区分（条件: J列=1 & 人口>=18万）")
print(f"   - J列として使用: {class_col} / 人口列として使用: {pop_col} / 市区町村列: {muni_col}")

# ====== 7) 集客率（既存ロジック流用：県平均 + 市区町村率） ======
# ここは“列名依存”が強いので、前回同様に「集客率っぽい列」を自動検出
def load_attraction(path):
    df = pd.read_excel(path, sheet_name=0)
    df.columns = [str(c).strip() for c in df.columns]
    pref_col = next((c for c in df.columns if c in ["県","都道府県","pref","prefecture"]), None)
    muni_col2 = next((c for c in df.columns if c in ["市区町村","市町村","自治体","municipality","市区分"]), None)
    rate_col = next((c for c in df.columns if ("集客率" in c or "来場率" in c or "動員率" in c)), None)

    if pref_col is None:
        pref_col = next((c for c in df.columns if ("都道府県" in c or c.endswith("県") or "県" in c)), df.columns[0])
    if muni_col2 is None:
        muni_col2 = next((c for c in df.columns if ("市" in c or "町" in c or "村" in c or "区" in c)), df.columns[0])
    if rate_col is None:
        # 率っぽい数値列
        cand = []
        for c in df.columns:
            if "率" in c:
                cand.append(c)
        rate_col = cand[0] if cand else df.columns[-1]

    d = df.copy()
    d[pref_col] = d[pref_col].astype(str).str.strip()
    d[muni_col2] = d[muni_col2].astype(str).str.strip()
    d[rate_col] = pd.to_numeric(d[rate_col], errors="coerce")
    d = d.dropna(subset=[rate_col])

    muni_rate_norm = {norm(row[muni_col2]): float(row[rate_col]) for _, row in d.iterrows()}
    pref_rate = d.groupby(pref_col)[rate_col].mean().to_dict()

    return {"pref_col": pref_col, "muni_col": muni_col2, "rate_col": rate_col,
            "muni_rate_norm": muni_rate_norm,
            "pref_rate": {str(k).strip(): float(v) for k, v in pref_rate.items()}}

attr = load_attraction(STATS_XLSX)

pref_rate_by_code = {}
for pc in pref_gap.keys():
    m = re.match(r"^\d{2}(.+)$", pc)
    if m:
        name = m.group(1)
        if name in attr["pref_rate"]:
            pref_rate_by_code[pc] = attr["pref_rate"][name]

city_rate = {}
for ck in city_keys:
    cn = norm(ck)
    if cn in attr["muni_rate_norm"]:
        city_rate[ck] = attr["muni_rate_norm"][cn]

def percentile_need(values_dict):
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

pref_pct, pref_need = percentile_need(pref_rate_by_code)
city_pct, city_need = percentile_need(city_rate)

# ====== 外部イベント（後日） ======
external_blackout = {(d.get("city_key"), d.get("week_id")): d.get("reason","外部イベント")
                     for d in CONFIG["EXTERNAL_EVENT_BLACKOUT"]}

# ====== 理由（上司向け口語） ======
def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "不明"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def fmt_pct(p):
    if p is None:
        return "不明"
    return f"下位{p*100:.0f}%"

def build_reason_presentation(variant, week_id, city_key, pref_code,
                              gap_need_pref, gap_real_pref, gap_need_city, gap_real_city,
                              pref_rate, pref_rank, city_rate_v, city_rank,
                              same_week_pref_hit, relax_mode,
                              remaining_plan, overflow_used, overflow_meta, score):
    # 口語で、でも根拠は数字を残す（上司が突っ込める）
    lines = []
    lines.append(f"【案{variant}】この枠（{week_id}）はAJ枠が空いていたので、次の優先順位で選びました。")

    # 1) 回数枠
    if not overflow_used:
        lines.append(f"1) まず『43期の開催回数の残り』がある市区分から選定。→ {city_key} は残り {remaining_plan} 回あったため、計画の範囲内で入れています。")
    else:
        # 例外増枠
        pop = overflow_meta.get("pop", None)
        jv  = overflow_meta.get("j_value", None)
        nm  = overflow_meta.get("name_raw", "")
        pop_txt = f"{int(pop):,}人" if isinstance(pop, (int,float)) and not math.isnan(pop) else "不明"
        lines.append("1) ここは『計画回数の残りがある市区分』だけでは候補が出ず、埋め切れなかったため例外ルールで増枠しています。")
        lines.append(f"   例外ルール：統計量のJ列=1 かつ 人口18万人以上。→ {nm or city_key}（J={jv} / 人口={pop_txt}）を対象にしました。")

    # 2) スパン（B=県 / A=市区）
    lines.append(f"2) 開催間隔（スパン）も見ています。県は『必要{gap_need_pref}週』に対して『前回から{gap_real_pref}週』、市区分は『必要{gap_need_city}週』に対して『前回から{gap_real_city}週』です。"
                 + (f"（制約緩和={relax_mode}）" if relax_mode else ""))

    # 3) 集客率（テコ入れ優先）
    if pref_rate is not None or city_rate_v is not None:
        lines.append(f"3) 集客率のテコ入れ観点。県平均は {fmt_rate(pref_rate)}（{fmt_pct(pref_rank)}）、市区分は {fmt_rate(city_rate_v)}（{fmt_pct(city_rank)}）。"
                     " 低いほど優先度を上げるロジックなので、改善余地の大きいエリアが上に来ます。")
    else:
        lines.append("3) 集客率はデータが取れていない項目があったため、スパンと回数を優先して選んでいます。")

    # 4) 同週同県回避
    lines.append("4) 同じ週に同じ都道府県が固まるとカニバりやすいので、同週同県はなるべく避けています。"
                 + ("（ただし他候補が厳しく同県になったためペナルティは付けています）" if same_week_pref_hit else "（今回は同週同県を回避できています）"))

    # 5) 豪雪・祭り
    lines.append("5) 豪雪地帯の12〜2月・3月1wは原則除外しています（例外リストで解除可能）。祭り/マラソン等は現状未連携で、後日API等をつなげて自動除外できます。")

    # 最後にまとめ
    lines.append(f"【まとめ】『回数の整合』×『間隔（県/市区）』×『集客率の改善余地』×『同週同県回避』を総合して、この候補が一番バランスが良いので採用しています。（score={score:.2f}）")
    return " / ".join(lines)[:32000]

# ====== 週×都道府県の既存使用（43期） ======
week_used_pref_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]
    pc = e["pref_code"]
    if w and pc:
        week_used_pref_base.setdefault(w, set()).add(pc)

# ====== Plan generator ======
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used_in_plan = {ck: 0 for ck in city_keys}
    lp_p = dict(last_pos_pref)
    lp_c = dict(last_pos_city)
    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}

    assigns = []

    def score_city(ck, apos, week_id, relax_mode, allow_overflow):
        pc = pref_by_city.get(ck, "")
        plan_cnt = plan_count_by_city.get(ck, 0)
        already  = scheduled_counts_43.get(ck, 0)
        remaining = plan_cnt - already - used_in_plan.get(ck, 0)

        overflow_used = False
        overflow_meta = {}

        # 回数ルール
        if remaining <= 0:
            if not allow_overflow:
                return None
            # 例外増枠の対象に入っていないなら不可
            if ck not in OVR_POOL:
                return None
            overflow_used = True
            meta = ovr_candidates.get(ck, {})
            overflow_meta = {
                "name_raw": meta.get("name_raw",""),
                "pop": meta.get("pop", float("nan")),
                "j_value": CONFIG["OVR_CLASS_J_VALUE"],
                "match_score": meta.get("match_score", None)
            }
            # 例外増枠は残り回数を「0→-1」みたいに扱う
            remaining = remaining  # そのまま（理由に残す）

        # スパン要件
        gap_need_pref = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        gap_need_city = city_gap.get(ck, CONFIG["GAP_WEEKS_MIN"])

        last_p = lp_p.get(pc, None) if pc else None
        last_c = lp_c.get(ck, None)
        gap_real_pref = 999 if last_p is None else (apos - last_p)
        gap_real_city = 999 if last_c is None else (apos - last_c)

        ok_p = (last_p is None) or (gap_real_pref >= gap_need_pref)
        ok_c = (last_c is None) or (gap_real_city >= gap_need_city)

        if relax_mode is None:
            if not (ok_p and ok_c):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪/外部イベント除外
        if pc and is_snow_blackout(pc, ck, week_id):
            return None
        if (ck, week_id) in external_blackout:
            return None

        # 同週同県ペナルティ
        same_week_pref_hit = False
        if pc and pc in week_used_pref.get(week_id, set()):
            same_week_pref_hit = True
        same_week_pen = CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0

        # 集客率（低いほどneedが高い）
        pref_need_score = pref_need.get(pc, 0.5)
        city_need_score = city_need.get(ck, 0.5)

        # “不足時の例外増枠”は軽くペナルティ（でも候補ゼロなら使う）
        overflow_pen = CONFIG["OVR_OVERFLOW_PENALTY"] if overflow_used else 0.0

        # スコア
        slack_p = gap_real_pref - gap_need_pref
        slack_c = gap_real_city - gap_need_city
        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        score = (
            slack_p * W["pref_slack"] +
            slack_c * W["city_slack"] +
            max(remaining, 0) * W["unmet_bonus"] * 5.0 +
            (pref_need_score * 10) * W["low_attr_pref"] +
            (city_need_score * 10) * W["low_attr_city"] -
            relax_pen - same_week_pen - overflow_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        reason = build_reason_presentation(
            variant=variant, week_id=week_id, city_key=ck, pref_code=pc,
            gap_need_pref=gap_need_pref, gap_real_pref=gap_real_pref,
            gap_need_city=gap_need_city, gap_real_city=gap_real_city,
            pref_rate=pref_rate_by_code.get(pc, None), pref_rank=pref_pct.get(pc, None),
            city_rate_v=city_rate.get(ck, None), city_rank=city_pct.get(ck, None),
            same_week_pref_hit=same_week_pref_hit, relax_mode=relax_mode,
            remaining_plan=(plan_cnt - already - used_in_plan.get(ck,0)),
            overflow_used=overflow_used, overflow_meta=overflow_meta,
            score=score
        )

        return {
            "ck": ck, "pc": pc, "score": float(score),
            "relax_mode": relax_mode,
            "same_week_pref_hit": same_week_pref_hit,
            "overflow_used": overflow_used,
            "overflow_meta": overflow_meta,
            "reason": reason
        }

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]

        best = None

        # ①通常候補（回数残あり）で探す：strict → relax
        for relax_mode in [None, "Aのみ", "B+A"]:
            for ck in city_keys:
                cand = score_city(ck, apos, week_id, relax_mode, allow_overflow=False)
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand
            if best is not None:
                break

        # ②どうしても無理なら例外増枠で探す（指定条件プール内のみ）
        used_overflow_for_this_slot = False
        if best is None:
            for relax_mode in [None, "Aのみ", "B+A"]:
                for ck in city_keys:
                    cand = score_city(ck, apos, week_id, relax_mode, allow_overflow=True)
                    if cand is None:
                        continue
                    if best is None or cand["score"] > best["score"]:
                        best = cand
                if best is not None:
                    break
            used_overflow_for_this_slot = True if (best is not None and best.get("overflow_used")) else False

        if best is None:
            # それでも無理なら空にする（誤提案より安全）
            assigns.append({
                "variant": variant,
                "week_id_43": week_id,
                "week_pos_43": int(slot["week_pos"]),
                "row_header": int(slot["row_header"]),
                "assign_city_key": "",
                "pref_code_guess": "",
                "score": float("-inf"),
                "reason_BT": f"案{variant}: 回数残/例外増枠プール/豪雪/スパン等の条件で候補が作れず空欄"
            })
            continue

        ck = best["ck"]; pc = best["pc"]
        used_in_plan[ck] += 1
        lp_c[ck] = apos
        if pc:
            lp_p[pc] = apos
            week_used_pref.setdefault(week_id, set()).add(pc)

        assigns.append({
            "variant": variant,
            "week_id_43": week_id,
            "week_pos_43": int(slot["week_pos"]),
            "row_header": int(slot["row_header"]),
            "assign_city_key": ck,
            "pref_code_guess": pc,
            "score": best["score"],
            "reason_BT": best["reason"]
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

def to_map(plan_df, key_col):
    return {int(r["row_header"]): r[key_col] for _, r in plan_df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")
A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")
A_reason = to_map(planA, "reason_BT")

# ====== 11) 書き戻し（42/43のみ残す） ======
wb = openpyxl.load_workbook(QUARTER_XLSX)
keep = {sheet42, sheet43}
for name in list(wb.sheetnames):
    if name not in keep:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
skipped = 0
overflow_used_cnt = 0

for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1

    # 上書き防止：CとDが空のときだけ書く
    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value
    if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
        skipped += 1
        continue

    a_city = str(A_city.get(int(row0), "") or "")
    b_city = str(B_city.get(int(row0), "") or "")
    c_city = str(C_city.get(int(row0), "") or "")

    # B/Cは必ず表示（欠損は "-"; 同じでも表示）
    b_disp = b_city if b_city else "-"
    c_disp = c_city if c_city else "-"

    ws43.cell(r, COL_CITY+1).value = a_city
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b_disp} / C:{c_disp}）"

    # F列：A→B→Cの順でpref_codeを入れる（空欄なら）
    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    if (ws43.cell(r, COL_PREF+1).value is None) or (str(ws43.cell(r, COL_PREF+1).value).strip()==""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    reason = str(A_reason.get(int(row0), "案A: 理由生成なし"))
    ws43.cell(r, COL_REASON_BT).value = reason
    if "例外ルールで増枠" in reason:
        overflow_used_cnt += 1

    written += 1

wb.save(OUT_QUARTER_ABC)

print("\n✅ 出力:", OUT_QUARTER_ABC)
print("   - 42期マスタ:", sheet42)
print("   - 43期マスタ:", sheet43)
print("✅ 43期 空欄AJ枠:", len(open43_AJ), " / 書込:", written, " / スキップ:", skipped)
print("✅ 例外増枠が使われた枠数（A案の理由から判定）:", overflow_used_cnt)


✅ 例外増枠プール: 3 市区分（条件: J列=1 & 人口>=18万）
   - J列として使用: Zの4分位（1=赤,2=黄,3=青,4=灰） / 人口列として使用: 人口 / 市区町村列: 市区町村

✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_不足時例外増枠_42_43のみ_理由BT.xlsx
   - 42期マスタ: 42期　マスタ
   - 43期マスタ: 43期　マスタ
✅ 43期 空欄AJ枠: 137  / 書込: 137  / スキップ: 0
✅ 例外増枠が使われた枠数（A案の理由から判定）: 0


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）【仕様修正版】
#
# ✅修正①：B案/C案が出ないケースの対策
#   - 原因A: これまで「Aと同じなら表示しない」仕様だった → 常にB/Cを表示する（同じでも出す、欠損は"-"）
#   - 原因B: 制約でB/C側の候補が取れず planB/planC に行が無い → フォールバック強化は維持しつつ、欠損は"-"で明示
#
# ✅修正②：同一週に同一都道府県をなるべく入れない（ソフト制約）
#   - 43期の既存枠(埋まり) + その案で既に提案した枠 を「週×都道府県」で使用済みにして
#     同一週の同一pref_codeにペナルティを付与（※不可能なら入るが優先度は下がる）
#
# ✅修正③：F列(都道府県コード+県名、例:25滋賀)を提案枠に書き込む
#   - city_key→pref_code_guess を用いてヘッダ行のF列に入れる（既に値があれば上書きしない）
#
# ✅修正④：出力ファイルは「42期マスタ」「43期マスタ」シートのみ
#   - その2シート以外は削除して保存（1ファイルのみ出力）
#
# 入力:
#   SA+AJ+共有用_四半期表20240303.xlsx
#   43期地域別会場回数.xlsx
#   市区町村_統計量_全国 (1).xlsx
#   豪雪例外リスト.xlsx（無ければ自動生成）
#
# 出力:
#   SA+AJ+共有用_四半期表20240303_43期提案ABC_42_43のみ_理由BT.xlsx
# ============================================

import os, re, math, random, difflib
import pandas as pd
import openpyxl
from openpyxl.utils import get_column_letter

# ====== 入力 ======
QUARTER_XLSX     = "SA+AJ+共有用_四半期表20240303.xlsx"
REGION_XLSX      = "43期地域別会場回数.xlsx"
ATTRACTION_XLSX  = "市区町村_統計量_全国 (1).xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

# ====== 出力（1ファイル） ======
OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_42_43のみ_理由BT.xlsx"

# ====== 列定義（0-based）=====
COL_WEEK = 0           # A 週ID
COL_REGION = 1         # B 地域 / (形態行ではAJ/合同/SA)
COL_CITY = 2           # C 市区分（提案Aを書き込む）
COL_VENUE = 3          # D 会場（会場要検討 + B/C案を併記）
COL_PREF = 5           # F 都道府県コード（例 25滋賀）←今回書き込みもする
COL_KIND_DETAIL = 1    # 形態行のB列
COL_REASON_BT = 72     # BT列（openpyxl: 1-based=72）

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

# ====== 設定 ======
CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,

    # 同一週 同一都道府県を避けるペナルティ（大きいほど避ける）
    "SAME_WEEK_PREF_PENALTY": 80.0,

    # 豪雪ブラックアウト（暫定）
    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    # 表記ゆれ
    "FUZZY_CUTOFF": 0.82,
    "MANUAL_CITY_MAP": {},

    # 祭り/マラソン等（後日差し込み）
    "EXTERNAL_EVENT_BLACKOUT": [],

    # 3案の重み
    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2, "over_penalty": 1.0,
              "low_attr_pref": 2.0, "low_attr_city": 1.5, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0, "over_penalty": 1.2,
              "low_attr_pref": 0.8, "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2, "over_penalty": 1.0,
              "low_attr_pref": 3.8, "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ====== Utility ======
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    ws["C2"] = "豪雪NGを例外的にOKにしたい対象を追加。空欄は無視。"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, city_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if city_key and city_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in CONFIG["SNOW_BLACKOUT_MARCH_W"]:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def extract_blocks(qsheet, week_index_map, which):
    # 週IDのヘッダ行 → 次行が形態行（B列）
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue
        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]
        kind = kind_norm(detail[COL_KIND_DETAIL])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "kind": kind,
            "city_raw": city,
            "venue_raw": venue,
            "pref_code": pref
        })

    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    # AJ/合同のみ扱う（SAは除外）
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

def best_fuzzy_key(a_norm, candidates_norm):
    best_ratio, best_norm = 0.0, ""
    for cand in candidates_norm:
        if not cand:
            continue
        r = difflib.SequenceMatcher(None, a_norm, cand).ratio()
        if r > best_ratio:
            best_ratio, best_norm = r, cand
    return best_norm, best_ratio

# ====== 1) Load quarter sheets ======
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])

q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ====== 2) city plan（市区分キー & 回数） ======
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()]
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key"] = plan_rows[1].astype(str)
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)
region_master = plan_rows[["pref_parent","city_key","plan_count"]].reset_index(drop=True)

city_keys = region_master["city_key"].tolist()
city_norm_map = {k: norm(k) for k in city_keys}
_norm_values = list(city_norm_map.values())
_norm_to_key = {city_norm_map[k]: k for k in city_norm_map}

def match_city_key(city_name):
    if not city_name:
        return None, 0.0
    if city_name in CONFIG["MANUAL_CITY_MAP"]:
        return CONFIG["MANUAL_CITY_MAP"][city_name], 1.0
    a = norm(city_name)
    if a in _norm_to_key and a:
        return _norm_to_key[a], 1.0
    for ck, ckn in city_norm_map.items():
        if ckn and (ckn in a or a in ckn):
            return ck, 0.95
    bn, ratio = best_fuzzy_key(a, _norm_values)
    if ratio >= CONFIG["FUZZY_CUTOFF"] and bn:
        return _norm_to_key[bn], ratio
    return None, ratio

def add_city_key(df):
    keys, scores = [], []
    for x in df["city_raw"].tolist():
        k, s = match_city_key(x)
        keys.append(k); scores.append(s)
    out = df.copy()
    out["city_key"] = keys
    out["match_score"] = scores
    return out

# 既存（埋まり）＝市区分or会場が入っている枠（AJ/合同とも）
scheduled42 = b42[(b42["city_raw"]!="") | (b42["venue_raw"]!="")].copy()
scheduled43 = b43[(b43["city_raw"]!="") | (b43["venue_raw"]!="")].copy()
scheduled42 = add_city_key(scheduled42)
scheduled43 = add_city_key(scheduled43)

# ★提案対象：43期の「AJ」かつ「CとDが空」のヘッダ行のみ
open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ====== 3) pref_code 推定（city_key→pref_code_guess） ======
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# gap（年N回→52/N週）
pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
city_gap = {ck: gap_weeks_from_count(int(cnt)) for ck, cnt in plan_count_by_city.items()}

# ====== 4) Timeline & last positions（42→43連結） ======
OFFSET_43 = len(week_order_42)
scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43
scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_city = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    ck = e["city_key"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)
    if ck:
        last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)

# ====== 5) 43期の計画残（plan - scheduled43） ======
scheduled_counts_43 = scheduled43.dropna(subset=["city_key"]).groupby("city_key").size().to_dict()
rem_base = {ck: plan_count_by_city.get(ck,0) - scheduled_counts_43.get(ck,0) for ck in city_keys}

# ====== 6) 集客率 ======
def load_attraction(path):
    df = pd.read_excel(path, sheet_name=0)
    df.columns = [str(c).strip() for c in df.columns]

    pref_col = next((c for c in df.columns if c in ["県","都道府県","pref","prefecture"]), None)
    muni_col = next((c for c in df.columns if c in ["市区町村","市町村","自治体","municipality","市区分"]), None)
    rate_col = next((c for c in df.columns if "集客率" in c or "来場率" in c or "動員率" in c), None)

    if pref_col is None or muni_col is None or rate_col is None:
        text_cols = [c for c in df.columns if df[c].dtype == object]
        if rate_col is None:
            for c in df.columns:
                if "率" in c and df[c].dtype != object:
                    rate_col = c; break
        if pref_col is None:
            for c in text_cols:
                if "県" in c or "都道府県" in c:
                    pref_col = c; break
        if muni_col is None:
            for c in text_cols:
                if "市" in c or "町" in c or "村" in c or "区" in c:
                    muni_col = c; break

    d = df.copy()
    d[pref_col] = d[pref_col].astype(str).str.strip()
    d[muni_col] = d[muni_col].astype(str).str.strip()
    d[rate_col] = pd.to_numeric(d[rate_col], errors="coerce")
    d = d.dropna(subset=[rate_col])

    muni_rate_norm = {norm(row[muni_col]): float(row[rate_col]) for _, row in d.iterrows()}
    pref_rate = d.groupby(pref_col)[rate_col].mean().to_dict()

    return {"pref_col": pref_col, "muni_col": muni_col, "rate_col": rate_col,
            "muni_rate_norm": muni_rate_norm,
            "pref_rate": {str(k).strip(): float(v) for k, v in pref_rate.items()}}

attr = load_attraction(ATTRACTION_XLSX)

# pref_code(07福島) → 県名"福島"で平均集客率
pref_rate_by_code = {}
for pc in pref_gap.keys():
    m = re.match(r"^\d{2}(.+)$", pc)
    if m:
        name = m.group(1)
        if name in attr["pref_rate"]:
            pref_rate_by_code[pc] = attr["pref_rate"][name]

# city_key → 集客率（市区町村）
city_rate = {}
for ck in city_keys:
    cn = norm(ck)
    if cn in attr["muni_rate_norm"]:
        city_rate[ck] = attr["muni_rate_norm"][cn]

def percentile_need(values_dict):
    if not values_dict:
        return {}, {}
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

pref_pct, pref_need = percentile_need(pref_rate_by_code)
city_pct, city_need = percentile_need(city_rate)

# ====== 7) 外部イベント（後日） ======
external_blackout = {(d.get("city_key"), d.get("week_id")): d.get("reason","外部イベント")
                     for d in CONFIG["EXTERNAL_EVENT_BLACKOUT"]}

def fmt_pct(p):
    return "NA" if p is None else f"{p*100:.0f}%"

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "NA"
    return f"{v*100:.2f}%" if v <= 1.0 else f"{v:.4g}"

def build_reason(variant, relax_mode, need_gap_p, need_gap_c, gp, gc,
                 pref_rate, pref_p, city_rate_v, city_p,
                 rem_before, rem_after, same_week_pref_hit, score):
    parts = []
    parts.append(f"案{variant}")
    if relax_mode:
        parts.append(f"制約緩和={relax_mode}")
    parts.append(f"スパンB(県):必要{need_gap_p}w/実績{gp}w")
    parts.append(f"スパンA(市区):必要{need_gap_c}w/実績{gc}w")
    parts.append(f"集客率(県)={fmt_rate(pref_rate)}(下位{fmt_pct(pref_p)})" if pref_rate is not None else "集客率(県)=NA")
    parts.append(f"集客率(市区)={fmt_rate(city_rate_v)}(下位{fmt_pct(city_p)})" if city_rate_v is not None else "集客率(市区)=NA")
    parts.append(f"同週同県={'回避成功' if not same_week_pref_hit else '同県あり(ペナ)'}")
    parts.append(f"豪雪=判定済")
    parts.append(f"祭り=未連携")
    parts.append(f"計画残(前)={rem_before}→(後)={rem_after}")
    parts.append(f"score={score:.2f}")
    return " / ".join(parts)[:32000]

# ====== 8) 週×都道府県の既存使用状況（43期）を作る ======
# 既に埋まってる枠のpref_codeは週内重複回避の対象にする
week_used_pref_base = {}
for _, e in scheduled43.iterrows():
    w = e["week_id"]
    pc = e["pref_code"]
    if not w or not pc:
        continue
    week_used_pref_base.setdefault(w, set()).add(pc)

# ====== 9) Plan generator（A/B/C） ======
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used = {ck: 0 for ck in city_keys}
    lp_p = dict(last_pos_pref)
    lp_c = dict(last_pos_city)

    # 週内pref使用状況（案内で更新）
    week_used_pref = {w:set(s) for w,s in week_used_pref_base.items()}

    assigns = []

    def score_candidate(ck, apos, week_id, relax_mode):
        pc = pref_by_city.get(ck, "")
        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_c = city_gap.get(ck, CONFIG["GAP_WEEKS_MIN"])

        last_p = lp_p.get(pc, None) if pc else None
        last_c = lp_c.get(ck, None)

        gp = 999 if last_p is None else (apos - last_p)
        gc = 999 if last_c is None else (apos - last_c)

        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_c = (last_c is None) or (gc >= need_gap_c)

        if relax_mode is None:
            if not (ok_p and ok_c):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪/外部イベントは常に除外
        if pc and is_snow_blackout(pc, ck, week_id):
            return None
        if (ck, week_id) in external_blackout:
            return None

        slack_p = gp - need_gap_p
        slack_c = gc - need_gap_c

        rem_before = rem_base.get(ck, 0) - used.get(ck, 0)
        unmet = max(rem_before, 0)
        over  = max(-rem_before, 0)

        pref_rate = pref_rate_by_code.get(pc, None)
        pref_need_score = pref_need.get(pc, 0.5)
        city_rate_v = city_rate.get(ck, None)
        city_need_score = city_need.get(ck, 0.5)

        # 週内同県回避ペナルティ（ソフト）
        same_week_pref_hit = False
        if pc:
            used_set = week_used_pref.get(week_id, set())
            if pc in used_set:
                same_week_pref_hit = True

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        same_week_pen = CONFIG["SAME_WEEK_PREF_PENALTY"] if same_week_pref_hit else 0.0

        score = (
            slack_p * W["pref_slack"] +
            slack_c * W["city_slack"] +
            unmet   * W["unmet_bonus"] * 5.0 -
            over    * W["over_penalty"] * 5.0 +
            (pref_need_score * 10) * W["low_attr_pref"] +
            (city_need_score * 10) * W["low_attr_city"] -
            relax_pen -
            same_week_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        return {
            "ck": ck, "pc": pc, "score": score,
            "need_gap_p": need_gap_p, "need_gap_c": need_gap_c,
            "gp": gp, "gc": gc,
            "pref_rate": pref_rate, "pref_p": pref_pct.get(pc, None),
            "city_rate": city_rate_v, "city_p": city_pct.get(ck, None),
            "rem_before": rem_before,
            "relax_mode": relax_mode,
            "same_week_pref_hit": same_week_pref_hit
        }

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]

        best = None

        # strict
        for ck in city_keys:
            cand = score_candidate(ck, apos, week_id, relax_mode=None)
            if cand is None:
                continue
            if best is None or cand["score"] > best["score"]:
                best = cand

        # relax A only
        if best is None:
            for ck in city_keys:
                cand = score_candidate(ck, apos, week_id, relax_mode="Aのみ")
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand

        # relax B+A (last resort)
        if best is None:
            for ck in city_keys:
                cand = score_candidate(ck, apos, week_id, relax_mode="B+A")
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand

        if best is None:
            # ここは「豪雪/外部イベントで全候補除外」など極端ケース
            continue

        ck = best["ck"]; pc = best["pc"]

        # consume
        used[ck] += 1
        lp_c[ck] = apos
        if pc:
            lp_p[pc] = apos

        # 週内使用prefを更新
        if pc:
            week_used_pref.setdefault(week_id, set()).add(pc)

        rem_after = best["rem_before"] - 1

        assigns.append({
            "variant": variant,
            "week_id_43": week_id,
            "week_pos_43": int(slot["week_pos"]),
            "row_header": int(slot["row_header"]),
            "assign_city_key": ck,
            "pref_code_guess": pc,
            "score": float(best["score"]),
            "reason_BT": build_reason(
                variant, best["relax_mode"],
                best["need_gap_p"], best["need_gap_c"],
                best["gp"], best["gc"],
                best["pref_rate"], best["pref_p"],
                best["city_rate"], best["city_p"],
                best["rem_before"], rem_after,
                best["same_week_pref_hit"],
                best["score"]
            )
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

# row_header -> city_key / pref_code / reason
def to_map(plan_df, key_col):
    return {int(r["row_header"]): r[key_col] for _, r in plan_df.iterrows()}

A_city = to_map(planA, "assign_city_key")
B_city = to_map(planB, "assign_city_key")
C_city = to_map(planC, "assign_city_key")

A_pref = to_map(planA, "pref_code_guess")
B_pref = to_map(planB, "pref_code_guess")
C_pref = to_map(planC, "pref_code_guess")

A_reason = to_map(planA, "reason_BT")

# ====== 10) 1ファイルに書き戻し（43期マスタのみ変更） + 42/43のみ残す ======
wb = openpyxl.load_workbook(QUARTER_XLSX)

# 42/43以外削除（修正④）
keep = {sheet42, sheet43}
for name in list(wb.sheetnames):
    if name not in keep:
        wb.remove(wb[name])

ws43 = wb[sheet43]

written = 0
skipped = 0

for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1  # 0-based -> 1-based

    # 安全：CとDが空のときだけ書く（上書き防止）
    c_val = ws43.cell(r, COL_CITY+1).value
    d_val = ws43.cell(r, COL_VENUE+1).value
    if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
        skipped += 1
        continue

    a_city = str(A_city.get(int(row0), "") or "")
    b_city = str(B_city.get(int(row0), "") or "")
    c_city = str(C_city.get(int(row0), "") or "")

    # 修正①：B/Cは「同じでも表示」・欠損は"-"
    b_disp = b_city if b_city else "-"
    c_disp = c_city if c_city else "-"

    # C列：A案（第一候補）
    ws43.cell(r, COL_CITY+1).value = a_city

    # D列：会場要検討（B/C併記）
    ws43.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}（B:{b_disp} / C:{c_disp}）"

    # F列：都道府県コード（修正③）
    # まずA案のprefを優先、無ければB→C
    pref_candidate = A_pref.get(int(row0), "") or B_pref.get(int(row0), "") or C_pref.get(int(row0), "")
    f_val = ws43.cell(r, COL_PREF+1).value
    if (f_val is None) or (str(f_val).strip() == ""):
        if pref_candidate:
            ws43.cell(r, COL_PREF+1).value = str(pref_candidate)

    # BT列：A案の理由
    ws43.cell(r, COL_REASON_BT).value = str(A_reason.get(int(row0), "案A:理由生成なし"))

    written += 1

wb.save(OUT_QUARTER_ABC)

print("✅ 出力:", OUT_QUARTER_ABC)
print("   - 42期マスタ:", sheet42)
print("   - 43期マスタ:", sheet43)
print("✅ 43期 空欄AJ枠:", len(open43_AJ), " / 書込:", written, " / スキップ:", skipped)
print("✅ B/C 欠損(行数) 目安:",
      (len(open43_AJ) - len(planB)), "(B),",
      (len(open43_AJ) - len(planC)), "(C)")
print("✅ 列確認: C=市区分, D=会場, F=都道府県, BT=理由")


✅ 出力: SA+AJ+共有用_四半期表20240303_43期提案ABC_42_43のみ_理由BT.xlsx
   - 42期マスタ: 42期　マスタ
   - 43期マスタ: 43期　マスタ
✅ 43期 空欄AJ枠: 137  / 書込: 137  / スキップ: 0
✅ B/C 欠損(行数) 目安: 0 (B), 0 (C)
✅ 列確認: C=市区分, D=会場, F=都道府県, BT=理由


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）【1ファイル統合版】
# ✅ 42期も考慮（スパン履歴に含める）
# ✅ 集客率も勘案（低い都道府県/市区町村ほど優先度↑）
# ✅ 豪雪NG（12〜2月 + 3月1w）※例外は外部Excelで解除
# ✅ 選定理由をBT列に出力（A案の理由をBT）
# ✅ 「ＡＪの空欄だけ」埋める（合同は“提案しない”が、履歴としてスパンには使う）
#
# ★仕様変更（今回の要望）
# - 出力は1ファイルだけ
# - C列（市区分）は A案の提案を書き込む（今まで通り）
# - D列（会場）には「会場要検討（B:xxx / C:yyy）」のように B案・C案の市区分も同時に載せる
#   → これで四半期表上で「Aが第一候補、B/Cが控え」を1セルで見れる
#
# 入力:
#   SA+AJ+共有用_四半期表20240303.xlsx
#   43期地域別会場回数.xlsx
#   市区町村_統計量_全国 (1).xlsx
# 出力:
#   SA+AJ+共有用_四半期表20240303_43期提案ABC_1ファイル統合_理由BT.xlsx
#   43期_MVP_提案スケジュール案_ABC_集客率_理由BT.xlsx（検証用）
#   豪雪例外リスト.xlsx（無ければ自動生成）
# ============================================

import os, re, math, random, difflib
import pandas as pd
import openpyxl
from openpyxl.utils import get_column_letter

# ====== 入力（Colabにアップロードしたファイル名） ======
QUARTER_XLSX     = "SA+AJ+共有用_四半期表20240303.xlsx"
REGION_XLSX      = "43期地域別会場回数.xlsx"
ATTRACTION_XLSX  = "市区町村_統計量_全国 (1).xlsx"

# ====== 出力 ======
OUT_QUARTER_ABC = "SA+AJ+共有用_四半期表20240303_43期提案ABC_1ファイル統合_理由BT.xlsx"
OUT_ANALYSIS    = "43期_MVP_提案スケジュール案_ABC_集客率_理由BT.xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

# ====== 四半期表の列（0-based） ======
COL_WEEK = 0           # A
COL_REGION = 1         # B
COL_CITY = 2           # C（市区分）
COL_VENUE = 3          # D（会場）
COL_PREF = 5           # F（例: 07福島）
COL_KIND_DETAIL = 1    # 形態行のB列にAJ/合同/SA
COL_REASON_BT = 72     # BT列（1-based=72）

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

# ====== 設定 ======
CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,

    # 豪雪ブラックアウト（暫定）
    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    # 表記ゆれ
    "FUZZY_CUTOFF": 0.82,
    "MANUAL_CITY_MAP": {},

    # 祭り/マラソン等（後日差し込み）
    "EXTERNAL_EVENT_BLACKOUT": [],

    # 3案の重み
    "WEIGHTS": {
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2, "over_penalty": 1.0,
              "low_attr_pref": 2.0, "low_attr_city": 1.5, "relax_penalty": 25.0, "noise": 0.8},
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0, "over_penalty": 1.2,
              "low_attr_pref": 0.8, "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2, "over_penalty": 1.0,
              "low_attr_pref": 3.8, "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ====== Utility ======
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"
    ws["B1"] = "city_key"
    ws["C1"] = "memo"
    ws["C2"] = "豪雪NGを例外的にOKにしたい対象を追加。空欄は無視。"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, city_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if city_key and city_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in CONFIG["SNOW_BLACKOUT_MARCH_W"]:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def extract_blocks(qsheet, week_index_map, which):
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue

        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]
        kind = kind_norm(detail[COL_KIND_DETAIL])

        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "kind": kind,
            "city_raw": city,
            "venue_raw": venue,
            "pref_code": pref
        })

    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

def best_fuzzy_key(a_norm, candidates_norm):
    best_ratio, best_norm = 0.0, ""
    for cand in candidates_norm:
        if not cand:
            continue
        r = difflib.SequenceMatcher(None, a_norm, cand).ratio()
        if r > best_ratio:
            best_ratio, best_norm = r, cand
    return best_norm, best_ratio

# ====== 1) Load quarter sheets ======
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])
q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ====== 2) region plan (city_key & 回数) ======
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()]
plan_rows["pref_parent"] = plan_rows[0].ffill()
plan_rows["city_key"] = plan_rows[1].astype(str)
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)
region_master = plan_rows[["pref_parent","city_key","plan_count"]].reset_index(drop=True)

city_keys = region_master["city_key"].tolist()
city_norm_map = {k: norm(k) for k in city_keys}
_norm_values = list(city_norm_map.values())
_norm_to_key = {city_norm_map[k]: k for k in city_norm_map}

def match_city_key(city_name):
    if not city_name:
        return None, 0.0
    if city_name in CONFIG["MANUAL_CITY_MAP"]:
        return CONFIG["MANUAL_CITY_MAP"][city_name], 1.0
    a = norm(city_name)
    if a in _norm_to_key and a:
        return _norm_to_key[a], 1.0
    for ck, ckn in city_norm_map.items():
        if ckn and (ckn in a or a in ckn):
            return ck, 0.95
    bn, ratio = best_fuzzy_key(a, _norm_values)
    if ratio >= CONFIG["FUZZY_CUTOFF"] and bn:
        return _norm_to_key[bn], ratio
    return None, ratio

def add_city_key(df):
    keys, scores = [], []
    for x in df["city_raw"].tolist():
        k, s = match_city_key(x)
        keys.append(k); scores.append(s)
    out = df.copy()
    out["city_key"] = keys
    out["match_score"] = scores
    return out

EXCLUDE_AJ_BASE_VENUES = ("AJ日本橋", "AJ秋葉原")
EXCLUDE_AJ_COUNT_KEYWORDS = ("萌え", "イラスト", "JIF")

def is_excluded_aj_base_venue(venue_raw):
    s = "" if venue_raw is None else str(venue_raw)
    s = re.sub(r"[\s　]+", "", s)
    for base in EXCLUDE_AJ_BASE_VENUES:
        if s == base:
            return True
        if s.startswith(base):
            tail = s[len(base):]
            if tail and any(k in tail for k in EXCLUDE_AJ_COUNT_KEYWORDS):
                return False
            return True
    return False

scheduled42 = b42[((b42["city_raw"]!="") | (b42["venue_raw"]!="")) & (~b42["venue_raw"].apply(is_excluded_aj_base_venue))].copy()
scheduled43 = b43[((b43["city_raw"]!="") | (b43["venue_raw"]!="")) & (~b43["venue_raw"].apply(is_excluded_aj_base_venue))].copy()

scheduled42 = add_city_key(scheduled42)
scheduled43 = add_city_key(scheduled43)

# ★提案対象：43期の「AJ」かつ「C(市区分)とD(会場)が空」のヘッダ行のみ
open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ====== 3) Pref mapping & gap ======
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)
pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
city_gap = {ck: gap_weeks_from_count(int(cnt)) for ck, cnt in plan_count_by_city.items()}

# ====== 4) Timeline & last positions ======
OFFSET_43 = len(week_order_42)
scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43
scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_city = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    ck = e["city_key"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)
    if ck:
        last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)

# ====== 5) Remaining base (plan - scheduled43) ======
scheduled_counts_43 = scheduled43.dropna(subset=["city_key"]).groupby("city_key").size().to_dict()
rem_base = {ck: plan_count_by_city.get(ck,0) - scheduled_counts_43.get(ck,0) for ck in city_keys}

# ====== 6) Attraction (集客率) ======
def load_attraction(path):
    df = pd.read_excel(path, sheet_name=0)
    df.columns = [str(c).strip() for c in df.columns]

    pref_col = next((c for c in df.columns if c in ["県","都道府県","pref","prefecture"]), None)
    muni_col = next((c for c in df.columns if c in ["市区町村","市町村","自治体","municipality","市区分"]), None)
    rate_col = next((c for c in df.columns if "集客率" in c or "来場率" in c or "動員率" in c), None)

    if pref_col is None or muni_col is None or rate_col is None:
        text_cols = [c for c in df.columns if df[c].dtype == object]
        if rate_col is None:
            for c in df.columns:
                if "率" in c and df[c].dtype != object:
                    rate_col = c; break
        if pref_col is None:
            for c in text_cols:
                if "県" in c or "都道府県" in c:
                    pref_col = c; break
        if muni_col is None:
            for c in text_cols:
                if "市" in c or "町" in c or "村" in c or "区" in c:
                    muni_col = c; break

    d = df.copy()
    d[pref_col] = d[pref_col].astype(str).str.strip()
    d[muni_col] = d[muni_col].astype(str).str.strip()
    d[rate_col] = pd.to_numeric(d[rate_col], errors="coerce")
    d = d.dropna(subset=[rate_col])

    muni_rate_norm = {norm(row[muni_col]): float(row[rate_col]) for _, row in d.iterrows()}
    pref_rate = d.groupby(pref_col)[rate_col].mean().to_dict()

    return {"df": d, "pref_col": pref_col, "muni_col": muni_col, "rate_col": rate_col,
            "muni_rate_norm": muni_rate_norm, "pref_rate": {str(k).strip(): float(v) for k, v in pref_rate.items()}}

attr = load_attraction(ATTRACTION_XLSX)

pref_rate_by_code = {}
for pc in pref_gap.keys():
    m = re.match(r"^\d{2}(.+)$", pc)
    if m:
        name = m.group(1)
        if name in attr["pref_rate"]:
            pref_rate_by_code[pc] = attr["pref_rate"][name]

city_rate = {}
for ck in city_keys:
    cn = norm(ck)
    if cn in attr["muni_rate_norm"]:
        city_rate[ck] = attr["muni_rate_norm"][cn]

def percentile_need(values_dict):
    if not values_dict:
        return {}, {}
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map}
    return pct_map, need_map

pref_pct, pref_need = percentile_need(pref_rate_by_code)
city_pct, city_need = percentile_need(city_rate)

# ====== 7) External blackout placeholder ======
external_blackout = {(d.get("city_key"), d.get("week_id")): d.get("reason","外部イベント")
                     for d in CONFIG["EXTERNAL_EVENT_BLACKOUT"]}

def fmt_pct(p):
    if p is None:
        return "NA"
    return f"{p*100:.0f}%"

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "NA"
    if v <= 1.0:
        return f"{v*100:.2f}%"
    return f"{v:.4g}"

def build_reason(variant, relax_mode, need_gap_p, need_gap_c, gp, gc,
                 pref_rate, pref_p, city_rate_v, city_p,
                 rem_before, rem_after, score):
    parts = []
    parts.append(f"案{variant}")
    if relax_mode:
        parts.append(f"制約緩和={relax_mode}")
    parts.append(f"スパンB(県):必要{need_gap_p}w/実績{gp}w")
    parts.append(f"スパンA(市区):必要{need_gap_c}w/実績{gc}w")
    parts.append(f"集客率(県)={fmt_rate(pref_rate)}(下位{fmt_pct(pref_p)})" if pref_rate is not None else "集客率(県)=NA")
    parts.append(f"集客率(市区)={fmt_rate(city_rate_v)}(下位{fmt_pct(city_p)})" if city_rate_v is not None else "集客率(市区)=NA")
    parts.append(f"豪雪=判定済")
    parts.append(f"祭り=未連携")
    parts.append(f"計画残(前)={rem_before}→(後)={rem_after}")
    parts.append(f"score={score:.2f}")
    return " / ".join(parts)[:32000]

# ====== 8) Plan generator ======
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    used = {ck: 0 for ck in city_keys}
    lp_p = dict(last_pos_pref)
    lp_c = dict(last_pos_city)

    assigns = []

    def score_candidate(ck, apos, week_id, relax_mode):
        pc = pref_by_city.get(ck, "")
        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_c = city_gap.get(ck, CONFIG["GAP_WEEKS_MIN"])

        last_p = lp_p.get(pc, None) if pc else None
        last_c = lp_c.get(ck, None)

        gp = 999 if last_p is None else (apos - last_p)
        gc = 999 if last_c is None else (apos - last_c)

        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_c = (last_c is None) or (gc >= need_gap_c)

        if relax_mode is None:
            if not (ok_p and ok_c):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪/外部イベントは常に除外
        if pc and is_snow_blackout(pc, ck, week_id):
            return None
        if (ck, week_id) in external_blackout:
            return None

        slack_p = gp - need_gap_p
        slack_c = gc - need_gap_c

        rem_before = rem_base.get(ck, 0) - used.get(ck, 0)
        unmet = max(rem_before, 0)
        over  = max(-rem_before, 0)

        pref_rate = pref_rate_by_code.get(pc, None)
        pref_need_score = pref_need.get(pc, 0.5)
        city_rate_v = city_rate.get(ck, None)
        city_need_score = city_need.get(ck, 0.5)

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        score = (
            slack_p * W["pref_slack"] +
            slack_c * W["city_slack"] +
            unmet   * W["unmet_bonus"] * 5.0 -
            over    * W["over_penalty"] * 5.0 +
            (pref_need_score * 10) * W["low_attr_pref"] +
            (city_need_score * 10) * W["low_attr_city"] -
            relax_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )

        return {
            "ck": ck, "pc": pc, "score": score,
            "need_gap_p": need_gap_p, "need_gap_c": need_gap_c,
            "gp": gp, "gc": gc,
            "pref_rate": pref_rate, "pref_p": pref_pct.get(pc, None),
            "city_rate": city_rate_v, "city_p": city_pct.get(ck, None),
            "rem_before": rem_before,
            "relax_mode": relax_mode
        }

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]

        best = None

        # strict
        for ck in city_keys:
            cand = score_candidate(ck, apos, week_id, relax_mode=None)
            if cand is None:
                continue
            if best is None or cand["score"] > best["score"]:
                best = cand

        # relax A only
        if best is None:
            for ck in city_keys:
                cand = score_candidate(ck, apos, week_id, relax_mode="Aのみ")
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand

        # relax B+A (last resort)
        if best is None:
            for ck in city_keys:
                cand = score_candidate(ck, apos, week_id, relax_mode="B+A")
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand

        if best is None:
            continue

        ck = best["ck"]; pc = best["pc"]

        # consume
        used[ck] += 1
        lp_c[ck] = apos
        if pc:
            lp_p[pc] = apos

        rem_after = best["rem_before"] - 1

        assigns.append({
            "variant": variant,
            "week_id_43": week_id,
            "week_pos_43": int(slot["week_pos"]),
            "row_header": int(slot["row_header"]),
            "assign_city_key": ck,
            "pref_code_guess": pc,
            "score": float(best["score"]),
            "reason_BT": build_reason(
                variant, best["relax_mode"],
                best["need_gap_p"], best["need_gap_c"],
                best["gp"], best["gc"],
                best["pref_rate"], best["pref_p"],
                best["city_rate"], best["city_p"],
                best["rem_before"], rem_after,
                best["score"]
            )
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

# ====== 9) Merge A/B/C by row_header ======
def idx_map(plan_df):
    return {int(r["row_header"]): str(r["assign_city_key"]) for _, r in plan_df.iterrows()}

mapA = idx_map(planA)
mapB = idx_map(planB)
mapC = idx_map(planC)

reasonA = {int(r["row_header"]): str(r["reason_BT"]) for _, r in planA.iterrows()}

# ====== 10) Write-back to ONE file ======
wb = openpyxl.load_workbook(QUARTER_XLSX)
ws = wb[sheet43]

written = 0
skipped = 0

for row0 in open43_AJ["row_header"].tolist():
    r = int(row0) + 1  # 0-based -> 1-based

    # 安全：CとDが空のときだけ書く
    c_val = ws.cell(r, COL_CITY+1).value
    d_val = ws.cell(r, COL_VENUE+1).value
    if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
        skipped += 1
        continue

    a_city = mapA.get(int(row0), "")
    b_city = mapB.get(int(row0), "")
    c_city = mapC.get(int(row0), "")

    # C列：A案（第一候補）
    ws.cell(r, COL_CITY+1).value = a_city if a_city else ""

    # D列：会場要検討（B/Cも併記）
    # 例：会場要検討（B:○○ / C:△△）
    extra = []
    if b_city and b_city != a_city:
        extra.append(f"B:{b_city}")
    if c_city and c_city != a_city and c_city != b_city:
        extra.append(f"C:{c_city}")
    suffix = f"（{' / '.join(extra)}）" if extra else ""
    ws.cell(r, COL_VENUE+1).value = f"{VENUE_PLACEHOLDER}{suffix}"

    # BT列：A案の選定理由
    ws.cell(r, COL_REASON_BT).value = reasonA.get(int(row0), f"案A:理由生成なし")

    written += 1

wb.save(OUT_QUARTER_ABC)

# ====== 11) Analysis workbook ======
with pd.ExcelWriter(OUT_ANALYSIS, engine="openpyxl") as w:
    planA.sort_values(["week_pos_43","row_header"]).to_excel(w, index=False, sheet_name="提案A")
    planB.sort_values(["week_pos_43","row_header"]).to_excel(w, index=False, sheet_name="提案B")
    planC.sort_values(["week_pos_43","row_header"]).to_excel(w, index=False, sheet_name="提案C")
    open43_AJ.to_excel(w, index=False, sheet_name="空欄_AJヘッダ")

    pd.DataFrame([{"info":"sheet43", "value": sheet43},
                  {"info":"空欄_AJヘッダ数", "value": len(open43_AJ)},
                  {"info":"書込", "value": written},
                  {"info":"スキップ", "value": skipped},
                  {"info":"BT列", "value": f"{COL_REASON_BT}({get_column_letter(COL_REASON_BT)})"},
                  {"info":"集客率列(pref_col)", "value": attr["pref_col"]},
                  {"info":"集客率列(muni_col)", "value": attr["muni_col"]},
                  {"info":"集客率列(rate_col)", "value": attr["rate_col"]},
                  ]).to_excel(w, index=False, sheet_name="サマリ")

print("✅ 出力（四半期表_統合）:", OUT_QUARTER_ABC, f"(書込{written}/スキップ{skipped})")
print("✅ 出力（検証）:", OUT_ANALYSIS)
print("✅ 豪雪例外:", SNOW_EXCEPT_XLSX)


✅ 出力（四半期表_統合）: SA+AJ+共有用_四半期表20240303_43期提案ABC_1ファイル統合_理由BT.xlsx (書込137/スキップ0)
✅ 出力（検証）: 43期_MVP_提案スケジュール案_ABC_集客率_理由BT.xlsx
✅ 豪雪例外: 豪雪例外リスト.xlsx


In [None]:
# ============================================
# 43期 スケジュール自動提案 MVP（A/B/C案）
# ✅ 42期も考慮（スパン履歴に含める）
# ✅ 集客率も勘案（低い都道府県/市区町村ほど優先度↑）
# ✅ 豪雪NG（12〜2月 + 3月1w）※例外は外部Excelで解除
# ✅ 選定理由を四半期表のBT列に出力
# ✅ 「ＡＪの空欄だけ」埋める（合同は“提案しない”が、履歴としてスパンには使う）
# ✅ 空欄＝(市区分(C列) & 会場(D列) が空) のヘッダ行だけ書き込み（上書き防止）
#
# ★修正ポイント（重要）
# - 「地域別会場回数の残数(rem)が0になったら候補から除外」だと、提案が1件しか出ないことがある
#   → rem<=0 でも候補として残し、"未達ボーナス" と "超過ペナルティ" で優先度を調整する
# - それでもスパン制約が厳しすぎて候補が無い枠が出た場合
#   → フォールバックで A(市区)スパンだけ緩和 → 最後にBも緩和(強ペナ/要確認) して「全部埋める」
# ============================================

import os, re, math, random, difflib
import pandas as pd
import openpyxl
from openpyxl.utils import get_column_letter

# ====== 入力（Colabにアップロードしたファイル名） ======
QUARTER_XLSX     = "SA+AJ+共有用_四半期表20240303.xlsx"
REGION_XLSX      = "43期地域別会場回数.xlsx"
ATTRACTION_XLSX  = "市区町村_統計量_全国 (1).xlsx"

# ====== 出力 ======
OUT_QUARTER_A = "SA+AJ+共有用_四半期表20240303_43期提案A_集客率考慮_理由BT書込.xlsx"
OUT_QUARTER_B = "SA+AJ+共有用_四半期表20240303_43期提案B_集客率考慮_理由BT書込.xlsx"
OUT_QUARTER_C = "SA+AJ+共有用_四半期表20240303_43期提案C_集客率考慮_理由BT書込.xlsx"
OUT_ANALYSIS  = "43期_MVP_提案スケジュール案_ABC_集客率_理由BT.xlsx"
SNOW_EXCEPT_XLSX = "豪雪例外リスト.xlsx"

# ====== 四半期表の列（openpyxl/pandas: 0-based） ======
# 実データ確認済み：A=週ID, B=地域/形態, C=市区分, D=会場, F=都道府県コード
COL_WEEK = 0           # A
COL_REGION = 1         # B（東/西/九州/北 etc. / 形態行では AJ/合同/SA）
COL_CITY = 2           # C（あなたが埋めたい「市区分」）
COL_VENUE = 3          # D（会場名）
COL_PREF = 5           # F（例: 07福島）
COL_KIND_DETAIL = 1    # 形態行のB列にAJ/合同/SAが入る
COL_REASON_BT = 72     # BT列（openpyxl 1-based=72）

VENUE_PLACEHOLDER = "会場要検討"
week_id_pat = re.compile(r"^\d{1,2}-\dw$")

# ====== 設定 ======
CONFIG = {
    "GAP_WEEKS_MIN": 6,
    "GAP_WEEKS_MAX": 30,

    # 豪雪ブラックアウト（暫定）
    "SNOW_BLACKOUT_MONTHS": {12, 1, 2},
    "SNOW_BLACKOUT_MARCH_W": {1},
    "SNOW_PREF_CODES": {
        "01北海道","02青森","03岩手","04宮城","05秋田","06山形","07福島",
        "15新潟","16富山","17石川","18福井","19山梨",
        "20長野","21岐阜","31鳥取","32島根"
    },

    # 表記ゆれ
    "FUZZY_CUTOFF": 0.82,
    "MANUAL_CITY_MAP": {},  # 例: {"郡山":"福島(郡山)"} のように手動対応

    # 祭り/マラソン等（後日差し込み）
    # 例: {"city_key":"福島", "week_id":"5-5w", "reason":"大祭"}
    "EXTERNAL_EVENT_BLACKOUT": [],

    # 3案の重み（ハード制約は全案共通）
    "WEIGHTS": {
        # A: バランス
        "A": {"pref_slack": 3.0, "city_slack": 2.0, "unmet_bonus": 1.2, "over_penalty": 1.0,
              "low_attr_pref": 2.0, "low_attr_city": 1.5, "relax_penalty": 25.0, "noise": 0.8},
        # B: スパン最重視
        "B": {"pref_slack": 4.5, "city_slack": 3.2, "unmet_bonus": 1.0, "over_penalty": 1.2,
              "low_attr_pref": 0.8, "low_attr_city": 0.6, "relax_penalty": 30.0, "noise": 0.8},
        # C: 低集客最重視
        "C": {"pref_slack": 2.0, "city_slack": 1.2, "unmet_bonus": 1.2, "over_penalty": 1.0,
              "low_attr_pref": 3.8, "low_attr_city": 3.2, "relax_penalty": 25.0, "noise": 0.8},
    },
    "SEEDS": {"A": 4301, "B": 4302, "C": 4303},
}

# ====== Utility ======
def norm(s):
    if s is None or (isinstance(s, float) and math.isnan(s)):
        return ""
    s = str(s).replace("　"," ").strip()
    s = re.sub(r"[ \t\n\r\-‐ー–—/／・,，\.。()（）【】\[\]「」『』]", "", s)
    return s

def find_sheet_name(xlsx_path, keywords):
    wb = openpyxl.load_workbook(xlsx_path, read_only=True, data_only=True)
    for name in wb.sheetnames:
        if all(k in name for k in keywords):
            return name
    return wb.sheetnames[0]

def parse_week_id(week_id):
    m = re.match(r"^(\d{1,2})-(\d)w$", str(week_id))
    if not m:
        return None, None
    return int(m.group(1)), int(m.group(2))

def gap_weeks_from_count(cnt):
    if cnt <= 0:
        return CONFIG["GAP_WEEKS_MIN"]
    g = int(math.ceil(52 / cnt))
    return max(CONFIG["GAP_WEEKS_MIN"], min(CONFIG["GAP_WEEKS_MAX"], g))

def kind_norm(x):
    if x is None or (isinstance(x, float) and math.isnan(x)):
        return ""
    s = str(x).strip()
    s = s.replace("AJ", "ＡＪ").replace("ａｊ", "ＡＪ")
    s = s.replace("SA", "ＳＡ").replace("ｓａ", "ＳＡ")
    return s

def ensure_snow_except_template(path):
    if os.path.exists(path):
        return
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "例外"
    ws["A1"] = "pref_code"     # 例）02青森
    ws["B1"] = "city_key"      # 例）福島 / 郡山 など（市区分キー）
    ws["C1"] = "memo"
    ws["C2"] = "豪雪NGを例外的にOKにしたい対象を追加。空欄は無視。"
    wb.save(path)

def load_snow_excepts(path):
    ensure_snow_except_template(path)
    df = pd.read_excel(path, sheet_name=0, dtype=str)
    pref = set(df.get("pref_code", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    city = set(df.get("city_key", pd.Series([], dtype=str)).dropna().astype(str).str.strip())
    return {p for p in pref if p}, {c for c in city if c}

SNOW_EXCEPT_PREF_CODES, SNOW_EXCEPT_CITY_KEYS = load_snow_excepts(SNOW_EXCEPT_XLSX)

def is_snow_blackout(pref_code, city_key, week_id):
    if not pref_code:
        return False
    if pref_code in SNOW_EXCEPT_PREF_CODES:
        return False
    if city_key and city_key in SNOW_EXCEPT_CITY_KEYS:
        return False
    if pref_code not in CONFIG["SNOW_PREF_CODES"]:
        return False
    month, w = parse_week_id(week_id)
    if month is None:
        return False
    if month in CONFIG["SNOW_BLACKOUT_MONTHS"]:
        return True
    if month == 3 and w in CONFIG["SNOW_BLACKOUT_MARCH_W"]:
        return True
    return False

def build_week_order(qsheet):
    week_order, seen = [], set()
    for v in qsheet[COL_WEEK].astype(str).tolist():
        if week_id_pat.match(v) and v not in seen:
            week_order.append(v); seen.add(v)
    return week_order, {w:i for i,w in enumerate(week_order)}

def extract_blocks(qsheet, week_index_map, which):
    # 週IDのヘッダ行 → 次行が形態行（B列）
    blocks = []
    for i in range(len(qsheet)-1):
        w = qsheet.iat[i, COL_WEEK]
        if pd.isna(w):
            continue
        w = str(w).strip()
        if not week_id_pat.match(w):
            continue

        header = qsheet.iloc[i]
        detail = qsheet.iloc[i+1]
        kind = kind_norm(detail[COL_KIND_DETAIL])

        region = "" if pd.isna(header[COL_REGION]) else str(header[COL_REGION]).strip()
        city   = "" if pd.isna(header[COL_CITY]) else str(header[COL_CITY]).strip()
        venue  = "" if pd.isna(header[COL_VENUE]) else str(header[COL_VENUE]).strip()
        pref   = "" if pd.isna(header[COL_PREF]) else str(header[COL_PREF]).strip()

        blocks.append({
            "fy": which, "week_id": w, "week_pos": week_index_map.get(w, None),
            "row_header": i, "row_detail": i+1,
            "kind": kind,
            "region_raw": region,
            "city_raw": city,
            "venue_raw": venue,
            "pref_code": pref
        })

    df = pd.DataFrame(blocks).dropna(subset=["week_pos"]).copy()
    df["week_pos"] = df["week_pos"].astype(int)
    return df[df["kind"].isin(["ＡＪ","合同"])].copy()

def best_fuzzy_key(a_norm, candidates_norm):
    best_ratio, best_norm = 0.0, ""
    for cand in candidates_norm:
        if not cand:
            continue
        r = difflib.SequenceMatcher(None, a_norm, cand).ratio()
        if r > best_ratio:
            best_ratio, best_norm = r, cand
    return best_norm, best_ratio

# ====== 1) Load quarter sheets ======
sheet42 = find_sheet_name(QUARTER_XLSX, ["42期", "マスタ"])
sheet43 = find_sheet_name(QUARTER_XLSX, ["43期", "マスタ"])
q42 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet42, header=None)
q43 = pd.read_excel(QUARTER_XLSX, sheet_name=sheet43, header=None)

week_order_42, week_index_42 = build_week_order(q42)
week_order_43, week_index_43 = build_week_order(q43)

b42 = extract_blocks(q42, week_index_42, "42")
b43 = extract_blocks(q43, week_index_43, "43")

# ====== 2) region plan (市区分キー & 回数) ======
r = pd.read_excel(REGION_XLSX, sheet_name=0, header=None)
plan_rows = r[~r[2].isna()].copy()
plan_rows = plan_rows[plan_rows[1].notna()]
plan_rows["pref_parent"] = plan_rows[0].ffill()          # 県の親（表の構造に依存）
plan_rows["city_key"] = plan_rows[1].astype(str)         # 市区分キー
plan_rows["plan_count"] = pd.to_numeric(plan_rows[2], errors="coerce").fillna(0).astype(int)
region_master = plan_rows[["pref_parent","city_key","plan_count"]].reset_index(drop=True)

city_keys = region_master["city_key"].tolist()
city_norm_map = {k: norm(k) for k in city_keys}
_norm_values = list(city_norm_map.values())
_norm_to_key = {city_norm_map[k]: k for k in city_norm_map}

def match_city_key(city_name):
    if not city_name:
        return None, 0.0
    if city_name in CONFIG["MANUAL_CITY_MAP"]:
        return CONFIG["MANUAL_CITY_MAP"][city_name], 1.0
    a = norm(city_name)
    if a in _norm_to_key and a:
        return _norm_to_key[a], 1.0
    # 部分一致
    for ck, ckn in city_norm_map.items():
        if ckn and (ckn in a or a in ckn):
            return ck, 0.95
    bn, ratio = best_fuzzy_key(a, _norm_values)
    if ratio >= CONFIG["FUZZY_CUTOFF"] and bn:
        return _norm_to_key[bn], ratio
    return None, ratio

def add_city_key(df):
    keys, scores = [], []
    for x in df["city_raw"].tolist():
        k, s = match_city_key(x)
        keys.append(k); scores.append(s)
    out = df.copy()
    out["city_key"] = keys
    out["match_score"] = scores
    return out

EXCLUDE_AJ_BASE_VENUES = ("AJ日本橋", "AJ秋葉原")
EXCLUDE_AJ_COUNT_KEYWORDS = ("萌え", "イラスト", "JIF")

def is_excluded_aj_base_venue(venue_raw):
    s = "" if venue_raw is None else str(venue_raw)
    s = re.sub(r"[\s　]+", "", s)
    for base in EXCLUDE_AJ_BASE_VENUES:
        if s == base:
            return True
        if s.startswith(base):
            tail = s[len(base):]
            if tail and any(k in tail for k in EXCLUDE_AJ_COUNT_KEYWORDS):
                return False
            return True
    return False

scheduled42 = b42[((b42["city_raw"]!="") | (b42["venue_raw"]!="")) & (~b42["venue_raw"].apply(is_excluded_aj_base_venue))].copy()
scheduled43 = b43[((b43["city_raw"]!="") | (b43["venue_raw"]!="")) & (~b43["venue_raw"].apply(is_excluded_aj_base_venue))].copy()

scheduled42 = add_city_key(scheduled42)
scheduled43 = add_city_key(scheduled43)

# ★提案対象：43期の「AJ」かつ「市区分(C)と会場(D)が空」のヘッダ行のみ
open43_AJ = b43[
    (b43["kind"]=="ＡＪ") &
    (b43["city_raw"]=="") &
    (b43["venue_raw"]=="")
].copy().sort_values(["week_pos","row_header"]).reset_index(drop=True)

# ====== 3) Pref code mapping & gap (pref/city) ======
# 県コードは四半期表 colF に入っている（例: 07福島）→ 県名部分を取り出して集客率と合わせる
pref_code_set = set([str(x).strip() for x in q43[COL_PREF].dropna().astype(str).unique().tolist()])

# 市区分の「親（県）」を pref_code に寄せる（pref_parentの先頭語が県名と一致する想定）
pref_name_to_code = {}
for pc in pref_code_set:
    m = re.match(r"^(\d{2})(.+)$", pc)
    if m:
        pref_name_to_code[m.group(2)] = pc

def parent_to_pref_code(pref_parent):
    if pref_parent is None or (isinstance(pref_parent,float) and math.isnan(pref_parent)):
        return ""
    s = str(pref_parent).strip()
    # 先頭の県名（数字より前/区切り前）を拾う
    m = re.match(r"^([^\d]+)", s)
    name = m.group(1).strip() if m else s
    return pref_name_to_code.get(name, "")

region_master["pref_code_guess"] = region_master["pref_parent"].apply(parent_to_pref_code)

# gap: 年N回 → 52/N 週
pref_plan_count = region_master.groupby("pref_code_guess")["plan_count"].sum().to_dict()
pref_gap   = {pc: gap_weeks_from_count(int(cnt)) for pc, cnt in pref_plan_count.items() if pc}
city_gap   = {ck: gap_weeks_from_count(int(cnt)) for ck, cnt in region_master.set_index("city_key")["plan_count"].to_dict().items()}

# ====== 4) Timeline (42→43連結) & last positions ======
OFFSET_43 = len(week_order_42)
scheduled42["abs_pos"] = scheduled42["week_pos"]
scheduled43["abs_pos"] = scheduled43["week_pos"] + OFFSET_43
open43_AJ["abs_pos"]   = open43_AJ["week_pos"] + OFFSET_43
scheduled_all = pd.concat([scheduled42, scheduled43], ignore_index=True)

last_pos_pref, last_pos_city = {}, {}
for _, e in scheduled_all.dropna(subset=["abs_pos"]).iterrows():
    ap = int(e["abs_pos"])
    pc = e["pref_code"]
    ck = e["city_key"]
    if pc:
        last_pos_pref[pc] = max(last_pos_pref.get(pc, -999), ap)
    if ck:
        last_pos_city[ck] = max(last_pos_city.get(ck, -999), ap)

pref_by_city = region_master.set_index("city_key")["pref_code_guess"].to_dict()
plan_count_by_city = region_master.set_index("city_key")["plan_count"].to_dict()

# ====== 5) 43期の現状消化（未達/超過の基準） ======
scheduled_counts_43 = scheduled43.dropna(subset=["city_key"]).groupby("city_key").size().to_dict()
# rem_base = plan - already_scheduled_in_43
rem_base = {ck: plan_count_by_city.get(ck,0) - scheduled_counts_43.get(ck,0) for ck in city_keys}

# ====== 6) Attraction (集客率) ======
def load_attraction(path):
    df = pd.read_excel(path, sheet_name=0)
    df.columns = [str(c).strip() for c in df.columns]

    pref_col = next((c for c in df.columns if c in ["県","都道府県","pref","prefecture"]), None)
    muni_col = next((c for c in df.columns if c in ["市区町村","市町村","自治体","municipality","市区分"]), None)
    rate_col = next((c for c in df.columns if "集客率" in c or "来場率" in c or "動員率" in c), None)

    if pref_col is None or muni_col is None or rate_col is None:
        text_cols = [c for c in df.columns if df[c].dtype == object]
        if rate_col is None:
            for c in df.columns:
                if "率" in c and df[c].dtype != object:
                    rate_col = c; break
        if pref_col is None:
            for c in text_cols:
                if "県" in c or "都道府県" in c:
                    pref_col = c; break
        if muni_col is None:
            for c in text_cols:
                if "市" in c or "町" in c or "村" in c or "区" in c:
                    muni_col = c; break

    d = df.copy()
    d[pref_col] = d[pref_col].astype(str).str.strip()
    d[muni_col] = d[muni_col].astype(str).str.strip()
    d[rate_col] = pd.to_numeric(d[rate_col], errors="coerce")
    d = d.dropna(subset=[rate_col])

    muni_rate_norm = {norm(row[muni_col]): float(row[rate_col]) for _, row in d.iterrows()}
    pref_rate = d.groupby(pref_col)[rate_col].mean().to_dict()

    return {"df": d, "pref_col": pref_col, "muni_col": muni_col, "rate_col": rate_col,
            "muni_rate_norm": muni_rate_norm, "pref_rate": {str(k).strip(): float(v) for k, v in pref_rate.items()}}

attr = load_attraction(ATTRACTION_XLSX)

# pref_code(07福島) → "福島" の県名で集客率と突合
pref_rate_by_code = {}
for pc in pref_gap.keys():
    m = re.match(r"^\d{2}(.+)$", pc)
    if m:
        name = m.group(1)
        if name in attr["pref_rate"]:
            pref_rate_by_code[pc] = attr["pref_rate"][name]

# city_key → muni_rate（正規化名で一致）
city_rate = {}
for ck in city_keys:
    cn = norm(ck)
    if cn in attr["muni_rate_norm"]:
        city_rate[ck] = attr["muni_rate_norm"][cn]

def percentile_need(values_dict):
    if not values_dict:
        return {}, {}
    items = [(k, v) for k, v in values_dict.items()
             if v is not None and not (isinstance(v, float) and math.isnan(v))]
    if not items:
        return {}, {}
    vals = [v for _, v in items]
    s = pd.Series(vals)
    pct = s.rank(pct=True, method="average").tolist()      # 小さいほどpct小
    keys_ = [k for k, _ in items]
    pct_map = {k: p for k, p in zip(keys_, pct)}
    need_map = {k: float(1 - pct_map[k]) for k in pct_map} # 低い集客率ほどneed高
    return pct_map, need_map

pref_pct, pref_need = percentile_need(pref_rate_by_code)
city_pct, city_need = percentile_need(city_rate)

# ====== 7) External blackout (future) ======
external_blackout = {(d.get("city_key"), d.get("week_id")): d.get("reason","外部イベント")
                     for d in CONFIG["EXTERNAL_EVENT_BLACKOUT"]}

def fmt_pct(p):
    if p is None:
        return "NA"
    return f"{p*100:.0f}%"

def fmt_rate(v):
    if v is None or (isinstance(v, float) and math.isnan(v)):
        return "NA"
    if v <= 1.0:
        return f"{v*100:.2f}%"
    return f"{v:.4g}"

def build_reason(variant, relax_mode, need_gap_p, need_gap_c, gp, gc,
                 pref_rate, pref_p, city_rate_v, city_p,
                 snow_ng, fest_ng, rem_before, rem_after, score):
    parts = []
    parts.append(f"案{variant}")
    if relax_mode:
        parts.append(f"制約緩和={relax_mode}")
    parts.append(f"スパンB(県):必要{need_gap_p}w/実績{gp}w")
    parts.append(f"スパンA(市区):必要{need_gap_c}w/実績{gc}w")
    parts.append(f"集客率(県)={fmt_rate(pref_rate)}(下位{fmt_pct(pref_p)})" if pref_rate is not None else "集客率(県)=NA")
    parts.append(f"集客率(市区)={fmt_rate(city_rate_v)}(下位{fmt_pct(city_p)})" if city_rate_v is not None else "集客率(市区)=NA")
    parts.append(f"豪雪={'NG' if snow_ng else 'OK'}")
    parts.append(f"祭り={'NG' if fest_ng else '未連携'}")
    parts.append(f"計画残(前)={rem_before}→(後)={rem_after}")
    parts.append(f"score={score:.2f}")
    return " / ".join(parts)[:32000]

# ====== 8) Plan generation (A/B/C) ======
def make_plan(variant):
    rng = random.Random(CONFIG["SEEDS"][variant])
    W = CONFIG["WEIGHTS"][variant]

    # 案ごとの“追加消化”カウンタ（rem_base を更新するため）
    used = {ck: 0 for ck in city_keys}

    # 案ごとの last_pos（スパンが案内で連鎖する）
    lp_p = dict(last_pos_pref)
    lp_c = dict(last_pos_city)

    assigns = []

    def score_candidate(ck, apos, week_id, relax_mode):
        pc = pref_by_city.get(ck, "")
        need_gap_p = pref_gap.get(pc, CONFIG["GAP_WEEKS_MIN"]) if pc else CONFIG["GAP_WEEKS_MIN"]
        need_gap_c = city_gap.get(ck, CONFIG["GAP_WEEKS_MIN"])

        last_p = lp_p.get(pc, None) if pc else None
        last_c = lp_c.get(ck, None)

        gp = 999 if last_p is None else (apos - last_p)
        gc = 999 if last_c is None else (apos - last_c)

        # スパン判定
        ok_p = (last_p is None) or (gp >= need_gap_p)
        ok_c = (last_c is None) or (gc >= need_gap_c)

        # relax_mode:
        #  None      : strict (B & A)
        #  "Aのみ"   : Bは守るがAは緩和
        #  "B+A"     : Bも緩和（最終手段）
        if relax_mode is None:
            if not (ok_p and ok_c):
                return None
        elif relax_mode == "Aのみ":
            if not ok_p:
                return None
        elif relax_mode == "B+A":
            pass

        # 豪雪/外部イベントは常に除外
        if pc and is_snow_blackout(pc, ck, week_id):
            return None
        if (ck, week_id) in external_blackout:
            return None

        slack_p = gp - need_gap_p
        slack_c = gc - need_gap_c

        # 計画残（43期の現状消化 + この案での追加消化を反映）
        rem_before = rem_base.get(ck, 0) - used.get(ck, 0)
        unmet = max(rem_before, 0)       # 未達ボーナス
        over  = max(-rem_before, 0)      # 超過ペナ

        # 集客率need（NAは中立0.5）
        pref_rate = pref_rate_by_code.get(pc, None)
        pref_need_score = pref_need.get(pc, 0.5)
        city_rate_v = city_rate.get(ck, None)
        city_need_score = city_need.get(ck, 0.5)

        relax_pen = 0.0
        if relax_mode == "Aのみ":
            relax_pen = W["relax_penalty"] * 0.6
        elif relax_mode == "B+A":
            relax_pen = W["relax_penalty"] * 1.0

        score = (
            slack_p * W["pref_slack"] +
            slack_c * W["city_slack"] +
            unmet   * W["unmet_bonus"] * 5.0 -
            over    * W["over_penalty"] * 5.0 +
            (pref_need_score * 10) * W["low_attr_pref"] +
            (city_need_score * 10) * W["low_attr_city"] -
            relax_pen +
            rng.uniform(-0.5, 0.5) * W["noise"]
        )
        return {
            "ck": ck, "pc": pc, "score": score,
            "need_gap_p": need_gap_p, "need_gap_c": need_gap_c,
            "gp": gp, "gc": gc,
            "pref_rate": pref_rate, "pref_p": pref_pct.get(pc, None),
            "city_rate": city_rate_v, "city_p": city_pct.get(ck, None),
            "rem_before": rem_before,
            "relax_mode": relax_mode
        }

    for _, slot in open43_AJ.iterrows():
        apos = int(slot["abs_pos"])
        week_id = slot["week_id"]

        best = None

        # まず strict（B+A）
        for ck in city_keys:
            cand = score_candidate(ck, apos, week_id, relax_mode=None)
            if cand is None:
                continue
            if best is None or cand["score"] > best["score"]:
                best = cand

        # フォールバック：Aだけ緩和（Bは守る）
        if best is None:
            for ck in city_keys:
                cand = score_candidate(ck, apos, week_id, relax_mode="Aのみ")
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand

        # 最終手段：Bも緩和（要確認だが「全部埋める」ため）
        if best is None:
            for ck in city_keys:
                cand = score_candidate(ck, apos, week_id, relax_mode="B+A")
                if cand is None:
                    continue
                if best is None or cand["score"] > best["score"]:
                    best = cand

        if best is None:
            # ここに来るのは（豪雪や外部イベントで全候補除外など）かなりレア
            continue

        ck = best["ck"]; pc = best["pc"]

        # 消化更新
        used[ck] += 1
        lp_c[ck] = apos
        if pc:
            lp_p[pc] = apos

        rem_after = best["rem_before"] - 1

        reason = build_reason(
            variant=variant,
            relax_mode=best["relax_mode"],
            need_gap_p=best["need_gap_p"],
            need_gap_c=best["need_gap_c"],
            gp=best["gp"],
            gc=best["gc"],
            pref_rate=best["pref_rate"],
            pref_p=best["pref_p"],
            city_rate_v=best["city_rate"],
            city_p=best["city_p"],
            snow_ng=False,
            fest_ng=False,
            rem_before=best["rem_before"],
            rem_after=rem_after,
            score=best["score"]
        )

        assigns.append({
            "variant": variant,
            "week_id_43": week_id,
            "week_pos_43": int(slot["week_pos"]),
            "abs_pos": apos,
            "row_header": int(slot["row_header"]),
            "row_detail": int(slot["row_detail"]),
            "assign_city_key": ck,
            "pref_code_guess": pc,
            "need_gap_pref": best["need_gap_p"],
            "need_gap_city": best["need_gap_c"],
            "actual_pref_gap": best["gp"],
            "actual_city_gap": best["gc"],
            "attr_pref_rate": best["pref_rate"],
            "attr_city_rate": best["city_rate"],
            "score": round(best["score"], 2),
            "reason_BT": reason
        })

    return pd.DataFrame(assigns)

planA = make_plan("A")
planB = make_plan("B")
planC = make_plan("C")

# ====== 9) Write-back (43期マスタのみ) ======
def write_back(out_path, plan_df):
    wb = openpyxl.load_workbook(QUARTER_XLSX)
    ws = wb[sheet43]

    written = 0
    skipped = 0

    for _, a in plan_df.iterrows():
        r = int(a["row_header"]) + 1  # 0-based -> 1-based

        # ★上書き事故防止：C(市区分) と D(会場) が空欄のときだけ書く
        c_val = ws.cell(r, COL_CITY+1).value
        d_val = ws.cell(r, COL_VENUE+1).value
        if (c_val is not None and str(c_val).strip() != "") or (d_val is not None and str(d_val).strip() != ""):
            skipped += 1
            continue

        ws.cell(r, COL_CITY+1).value = str(a["assign_city_key"])
        ws.cell(r, COL_VENUE+1).value = VENUE_PLACEHOLDER
        ws.cell(r, COL_REASON_BT).value = str(a["reason_BT"])
        written += 1

    wb.save(out_path)
    return written, skipped

wA, sA = write_back(OUT_QUARTER_A, planA)
wB, sB = write_back(OUT_QUARTER_B, planB)
wC, sC = write_back(OUT_QUARTER_C, planC)

# ====== 10) Analysis workbook ======
with pd.ExcelWriter(OUT_ANALYSIS, engine="openpyxl") as w:
    planA.sort_values(["week_pos_43","row_header"]).to_excel(w, index=False, sheet_name="提案A")
    planB.sort_values(["week_pos_43","row_header"]).to_excel(w, index=False, sheet_name="提案B")
    planC.sort_values(["week_pos_43","row_header"]).to_excel(w, index=False, sheet_name="提案C")

    open43_AJ.to_excel(w, index=False, sheet_name="空欄_AJヘッダ")
    scheduled42.sort_values(["week_pos","row_header"]).to_excel(w, index=False, sheet_name="既存_42")
    scheduled43.sort_values(["week_pos","row_header"]).to_excel(w, index=False, sheet_name="既存_43")

    # 残数の見える化（43期現状）
    rem_df = pd.DataFrame([{
        "city_key": ck,
        "plan_count": plan_count_by_city.get(ck,0),
        "scheduled_43": scheduled_counts_43.get(ck,0),
        "rem_base(plan-scheduled43)": rem_base.get(ck,0)
    } for ck in city_keys]).sort_values(["rem_base(plan-scheduled43)","city_key"], ascending=[True, True])
    rem_df.to_excel(w, index=False, sheet_name="計画残_43現状")

    pd.DataFrame([{"pref_code":pc,"pref_attr_rate":pref_rate_by_code.get(pc), "need(低いほど高)":pref_need.get(pc)} for pc in sorted(pref_gap.keys())])\
      .to_excel(w, index=False, sheet_name="県_集客率")
    pd.DataFrame([{"city_key":ck,"city_attr_rate":city_rate.get(ck), "need(低いほど高)":city_need.get(ck)} for ck in sorted(city_keys)])\
      .to_excel(w, index=False, sheet_name="市区_集客率")

    pd.DataFrame([
        {"info":"sheet42", "value": sheet42},
        {"info":"sheet43", "value": sheet43},
        {"info":"空欄_AJヘッダ数", "value": len(open43_AJ)},
        {"info":"提案A数", "value": len(planA)}, {"info":"提案A 書込", "value": wA}, {"info":"提案A スキップ", "value": sA},
        {"info":"提案B数", "value": len(planB)}, {"info":"提案B 書込", "value": wB}, {"info":"提案B スキップ", "value": sB},
        {"info":"提案C数", "value": len(planC)}, {"info":"提案C 書込", "value": wC}, {"info":"提案C スキップ", "value": sC},
        {"info":"集客率列(pref_col)", "value": attr["pref_col"]},
        {"info":"集客率列(muni_col)", "value": attr["muni_col"]},
        {"info":"集客率列(rate_col)", "value": attr["rate_col"]},
        {"info":"豪雪例外pref数", "value": len(SNOW_EXCEPT_PREF_CODES)},
        {"info":"豪雪例外city数", "value": len(SNOW_EXCEPT_CITY_KEYS)},
        {"info":"BT列index", "value": f"{COL_REASON_BT}({get_column_letter(COL_REASON_BT)})"},
    ]).to_excel(w, index=False, sheet_name="サマリ")

print("✅ 空欄(AJヘッダ)数:", len(open43_AJ))
print("✅ A:", f"提案{len(planA)} / 書込{wA} / スキップ{sA} -> {OUT_QUARTER_A}")
print("✅ B:", f"提案{len(planB)} / 書込{wB} / スキップ{sB} -> {OUT_QUARTER_B}")
print("✅ C:", f"提案{len(planC)} / 書込{wC} / スキップ{sC} -> {OUT_QUARTER_C}")
print("✅ 分析:", OUT_ANALYSIS)
print("✅ 豪雪例外:", SNOW_EXCEPT_XLSX)


✅ 空欄(AJヘッダ)数: 137
✅ A: 提案137 / 書込137 / スキップ0 -> SA+AJ+共有用_四半期表20240303_43期提案A_集客率考慮_理由BT書込.xlsx
✅ B: 提案137 / 書込137 / スキップ0 -> SA+AJ+共有用_四半期表20240303_43期提案B_集客率考慮_理由BT書込.xlsx
✅ C: 提案137 / 書込137 / スキップ0 -> SA+AJ+共有用_四半期表20240303_43期提案C_集客率考慮_理由BT書込.xlsx
✅ 分析: 43期_MVP_提案スケジュール案_ABC_集客率_理由BT.xlsx
✅ 豪雪例外: 豪雪例外リスト.xlsx
