<a href="https://colab.research.google.com/github/nanafish/ORS/blob/main/%E9%9B%BB%E8%BB%8A%E3%82%A2%E3%82%A4%E3%82%BD%E3%82%AF%E3%83%AD%E3%83%B3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===== 1セル：N02駅 × S12利用者数 → 30/60分“近似”圏CSV（保守設定＆路線名除外＆CP932対応） =====
!pip -q install geopandas shapely pyproj fiona

import os, math, csv, unicodedata, difflib, numpy as np, pandas as pd, geopandas as gpd
from shapely.geometry import Point
import os, logging, warnings
os.environ["SHAPE_ENCODING"] = "CP932"  # SHPの既定エンコーディングを明示
logging.getLogger("fiona").setLevel(logging.ERROR)      # Fionaの警告を非表示
logging.getLogger("fiona.ogrext").setLevel(logging.ERROR)


# ==== 入力（アップロード先のパスを必要に応じて変更） ====
N02_BASE = "/content/N02-23_Station"             # 必須: .shp/.shx/.dbf（+.prj推奨）
S12_BASE = "/content/S12-23_NumberOfPassengers"  # 任意: .shp/.shx/.dbf（+.prj）
S12_GEOJSON = "/content/S12-23_NumberOfPassengers.geojson"  # 任意: 置いてあれば優先

# ==== 出力調整（広がり過ぎ対策：保守設定） ====
TIME_BANDS = [30, 60]
BAND_SPEED_KMH = {30: 32.0, 60: 24.0}   # 速度仮定（km/h）
SHRINK_RATIO   = {30: 0.92, 60: 0.88}   # 半径に乗算（更に絞るなら 0.88 などへ）
NEAREST_JOIN_M = 300                    # S12最近傍結合の距離[m]（弱ければ400〜600へ）

# ==== 駅名列の既知名（分かれば指定 / 不明なら "" のままでOK） ====
NAME_FIELD_OVERRIDE_N02 = "N02_駅名"

def nrm(s): return unicodedata.normalize("NFKC", str(s)).strip()

# ---------- N02（駅）読み込み ----------
for ext in [".shp",".shx",".dbf"]:
    if not os.path.exists(N02_BASE+ext):
        raise FileNotFoundError(f"N02が不足: {ext} が見つかりません")

try: gdf = gpd.read_file(N02_BASE + ".shp")
except Exception: gdf = gpd.read_file(N02_BASE + ".shp", encoding="cp932")
if gdf.empty: raise RuntimeError("N02 Station.shp が空です")
if gdf.crs is None: gdf.set_crs(epsg=4326, inplace=True)
try: gdf = gdf.to_crs(epsg=4326)
except Exception: pass

# 幾何クリーニング → Point化
g = gdf[gdf.geometry.notnull() & (~gdf.geometry.is_empty)].copy()
if (g.geom_type == "MultiPoint").any():
    g = g.explode(index_parts=False).reset_index(drop=True)
if not (g.geom_type == "Point").all():
    try: w = g.to_crs(epsg=3857)
    except Exception: w = g.copy()
    def to_point(geom):
        if geom.geom_type == "Point": return geom
        try:
            if geom.geom_type in ("Polygon","MultiPolygon"): return geom.representative_point()
            else: return geom.centroid
        except Exception:
            minx,miny,maxx,maxy = geom.bounds; return Point((minx+maxx)/2, (miny+maxy)/2)
    w["geometry"] = w.geometry.apply(to_point)
    try: g = w.to_crs(epsg=4326)
    except Exception: g = w

# 駅名列の決定
def pick_name_col(df):
    if NAME_FIELD_OVERRIDE_N02 and NAME_FIELD_OVERRIDE_N02 in df.columns: return NAME_FIELD_OVERRIDE_N02
    cand = [c for c in df.columns if ("駅" in str(c)) and pd.api.types.is_string_dtype(df[c])]
    if cand: return cand[0]
    best,score=None,-1
    for c in df.columns:
        if not pd.api.types.is_string_dtype(df[c]): continue
        s = df[c].astype(str).fillna("")
        non_num = (s.str.fullmatch(r"\d+").fillna(False)==False).mean()
        sc = non_num*0.7 + (s.str.len().mean()>1)*0.3
        if sc>score: best,score=c,sc
    return best
n02_name = pick_name_col(g)
if not n02_name: raise RuntimeError(f"N02: 駅名列が特定できません。列: {list(g.columns)}")

g["Name"] = g[n02_name].astype(str).map(nrm)
g["Latitude"]  = g.geometry.y
g["Longitude"] = g.geometry.x
st = g[["Name","Latitude","Longitude","geometry"]].dropna().drop_duplicates(subset=["Name","Latitude","Longitude"]).reset_index(drop=True)

# ---------- S12（利用者数）読み込み（CP932対応＋fiona強制） ----------
def load_s12():
    old_engine = gpd.options.io_engine
    gpd.options.io_engine = "fiona"  # pyogrioだとCP932で落ちることがある
    try:
        if os.path.exists(S12_GEOJSON):
            s = gpd.read_file(S12_GEOJSON)  # 通常UTF-8
        elif all(os.path.exists(S12_BASE+ext) for ext in [".shp",".shx",".dbf"]):
            try: s = gpd.read_file(S12_BASE + ".shp", encoding="cp932")
            except UnicodeDecodeError: s = gpd.read_file(S12_BASE + ".shp", encoding="shift_jis")
        else:
            return None
    finally:
        gpd.options.io_engine = old_engine
    if s.crs is None: s.set_crs(epsg=4326, inplace=True)
    try: s = s.to_crs(epsg=4326)
    except Exception: pass
    s = s[s.geometry.notnull() & (~s.geometry.is_empty)].copy()
    if (s.geom_type == "MultiPoint").any():
        s = s.explode(index_parts=False).reset_index(drop=True)
    if not (s.geom_type == "Point").all():
        try: w = s.to_crs(epsg=3857)
        except Exception: w = s.copy()
        def to_point(geom):
            if geom.geom_type == "Point": return geom
            try:
                if geom.geom_type in ("Polygon","MultiPolygon"): return geom.representative_point()
                else: return geom.centroid
            except Exception:
                minx,miny,maxx,maxy = geom.bounds; return Point((minx+maxx)/2, (miny+maxy)/2)
        w["geometry"] = w.geometry.apply(to_point)
        try: s = w.to_crs(epsg=4326)
        except Exception: s = w
    return s

s12 = load_s12()
pass_map = {}  # ← NameError対策：必ず初期化

if s12 is not None and not s12.empty:
    # 駅名候補＆数値候補
    name_cands = [c for c in s12.columns if ("駅" in str(c)) and pd.api.types.is_string_dtype(s12[c])] \
                 or [c for c in s12.columns if pd.api.types.is_string_dtype(s12[c])]
    pnum_cands = [c for c in s12.columns if pd.api.types.is_numeric_dtype(s12[c])]
    s12["__name__"] = s12[name_cands[0]].astype(str).map(nrm) if name_cands else ""
    best_pcol, best_score = None, -1
    for c in pnum_cands:
        ser = s12[c]
        mean_val = ser.replace([np.inf,-np.inf], np.nan).dropna().mean() if hasattr(ser,"mean") else 0
        zero_rate = (ser==0).mean() if hasattr(ser,"mean") else 1.0
        score = float(mean_val)*(1.0-float(zero_rate))
        if score>best_score: best_pcol, best_score = c, score
    # 最近傍300mで付与
    try:
        n02m = st.to_crs(epsg=3857); s12m = s12.to_crs(epsg=3857)
        joined = gpd.sjoin_nearest(n02m, s12m, how="left", max_distance=NEAREST_JOIN_M)
        if best_pcol:
            tmp = joined[["Name", best_pcol]].dropna()
            pass_map.update(tmp.groupby("Name")[best_pcol].max().to_dict())
    except Exception:
        pass
    # 名前で補完
    if name_cands and best_pcol:
        sdict = s12.set_index("__name__")[best_pcol].to_dict()
        for nm in st["Name"].unique():
            if nm not in pass_map:
                v = sdict.get(nm) or sdict.get(nm.replace("駅",""))
                if v is not None: pass_map[nm] = v

def passengers_label(name):
    v = pass_map.get(name) or pass_map.get(name.replace("駅",""))
    if v is None: return None
    try: return f"{int(round(float(v))):,}人/日"
    except: return str(v)

# ---------- 起点選択（駅名 or 座標） ----------
print("（ヒント）座標入力でもOK：例 35.003666960967934, 135.75990277504576")
query = input("出発駅名 or 'lat,lon'（例: 烏丸駅 / 35.0037,135.7599）: ").strip()

def hav_km(a_lat,a_lon,b_lat,b_lon):
    R=6371.0088
    A=math.radians(a_lat); B=math.radians(b_lat)
    dlat=math.radians(b_lat-a_lat); dlon=math.radians(b_lon-a_lon)
    h=math.sin(dlat/2)**2 + math.cos(A)*math.cos(B)*math.sin(dlon/2)**2
    return 2*R*math.asin(math.sqrt(h))

origin = None
if "," in query:
    try:
        lat_s, lon_s = [float(v) for v in query.split(",", 1)]
        d2 = (st["Latitude"]-lat_s)**2 + (st["Longitude"]-lon_s)**2
        origin = st.iloc[d2.idxmin()]
    except Exception:
        origin = None

def pick_origin_by_name(df, q):
    qn = nrm(q)
    # 完全一致→“駅”抜き一致
    hit = df[df["Name"] == qn]
    if not hit.empty: return hit.iloc[0]
    base = qn.replace("駅","")
    hit = df[df["Name"].str.replace("駅","", regex=False) == base]
    if not hit.empty: return hit.iloc[0]
    # 部分一致：路線名（線/方面/支線/ライン）を除外して駅優先
    part = df[
        df["Name"].str.contains(base, na=False)
        & ~df["Name"].str.contains(r"(線|方面|支線|ライン)$", na=False)
    ]
    if part.empty:
        part = df[df["Name"].str.contains(base, na=False)]
    if part.empty:
        # 類似候補 上位→駅優先
        names = df["Name"].tolist()
        scored = [(difflib.SequenceMatcher(None, qn, n).ratio(), n) for n in names]
        scored.sort(reverse=True)
        cand_names = [n for _, n in scored if not pd.isna(n)]
        cand_names = [n for n in cand_names if not any(k in n for k in ["線","方面","支線","ライン"])] or [n for _, n in scored]
        part = df[df["Name"].isin(cand_names[:20])]
    # 表示＆選択
    print("\n候補が複数あります。番号で選択（Enterで1）:")
    cand = part.copy()
    cand = cand.assign(
        _sim=cand["Name"].apply(lambda n: difflib.SequenceMatcher(None, qn, n).ratio()),
        _is_station=cand["Name"].str.contains("駅", na=False).astype(int)
    ).sort_values(["_is_station","_sim","Name"], ascending=[False, False, True]).head(20).reset_index(drop=True)
    for i, row in cand.iterrows():
        print(f"{i+1:2d}: {row['Name']}  ({row['Latitude']:.5f}, {row['Longitude']:.5f})")
    sel = input("番号: ").strip(); idx = 0
    if sel.isdigit():
        v = int(sel)
        if 1 <= v <= len(cand): idx = v-1
    return cand.iloc[idx]

if origin is None:
    origin = pick_origin_by_name(st, query)

olat, olon = float(origin["Latitude"]), float(origin["Longitude"])
print(f"\n出発駅: {origin['Name']} ({olat:.6f}, {olon:.6f})")
print(f"乗降人員ラベル付与駅数: {len(pass_map)}")

# ---------- 抽出（30/60分・保守設定を適用） ----------
def max_km_for_band(band):  # 半径[km]
    return BAND_SPEED_KMH.get(band, 32.0) * band / 60.0 * SHRINK_RATIO.get(band, 1.0)

MAX_KM = {b: max_km_for_band(b) for b in TIME_BANDS}
rows = {b: [] for b in TIME_BANDS}

for _, s in st.iterrows():
    if s["Name"] == origin["Name"] and abs(s["Latitude"]-olat)<1e-10 and abs(s["Longitude"]-olon)<1e-10:
        continue
    km = hav_km(olat, olon, float(s["Latitude"]), float(s["Longitude"]))
    for b in TIME_BANDS:  # 30→60 の順で最小バンドへ
        if km <= MAX_KM[b]:
            tmin = round(km / BAND_SPEED_KMH[b] * 60.0, 1)
            lbl = s["Name"]; ppl = passengers_label(s["Name"])
            if ppl: lbl = f"{lbl}（{ppl}）"
            rows[b].append({"Name": s["Name"], "Latitude": float(s["Latitude"]), "Longitude": float(s["Longitude"]),
                            "Time_min": tmin, "Band": f"<= {b}分", "Label": lbl})
            break

# ---------- 出力（My Maps用CSV：Labelに利用者数） ----------
def write_csv(path, items):
    with open(path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f); w.writerow(["Name","Latitude","Longitude","Time_min","Band","Label"])
        for r in items: w.writerow([r["Name"], r["Latitude"], r["Longitude"], r["Time_min"], r["Band"], r["Label"]])

for b in TIME_BANDS:
    out = f"/content/{origin['Name']}_{b}min.csv"
    write_csv(out, rows[b])
    print("出力:", out, "件数:", len(rows[b]), " 半径(補正後km):", round(MAX_KM[b],2),
          " 速度[km/h]:", BAND_SPEED_KMH[b], " 縮小:", SHRINK_RATIO[b])

print("\nMy Maps：レイヤ→インポート→CSV / 位置=Latitude・Longitude / タイトル=Name / ラベル=Label（推奨）")
print("※ まだ広い場合：BAND_SPEED_KMH を下げる、または SHRINK_RATIO を 0.85 などに。S12結合が弱ければ NEAREST_JOIN_M を 400–600 に。")


（ヒント）座標入力でもOK：例 35.003666960967934, 135.75990277504576
