In [2]:
#日本海側、太平洋側をきっちり分ける領域
#陸上は気象庁の予報区分に準拠
#東北日本海側 青森県（津軽地方）、秋田県、山形県、福島県（会津地方）
#東北太平洋側 青森県（下北、三八上北地方）、岩手県、宮城県、福島県（中通り、浜通り地方）
#海上は下北半島周辺も考慮し、140.5で区切る

In [None]:
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd

# === 1. 都道府県の境界データ ===
pref_gdf = gpd.read_file("/mnt/jet12/makoto/module/japan_prefecture.geojson")

# === 2. 東北6県だけ抽出 ===
tohoku_pref_names = ["青森県", "岩手県", "秋田県", "宮城県", "山形県", "福島県"]
tohoku_gdf = pref_gdf[pref_gdf["nam_ja"].isin(tohoku_pref_names)]

# === 3. 日本海側／太平洋側のポリゴンを作成 ===
#   ※ 青森県だけ特別扱い（県境を直接使わず、後で部分領域を追加）
n03_gdf = gpd.read_file("/mnt/jet12/makoto/module/N03_001_2023.geojson")

# 東北6県のみ
tohoku_n03 = n03_gdf[n03_gdf["N03_001"].isin(tohoku_pref_names)].copy()

# ---- 青森県の予報区分で地域を分ける ----
def aomori_area(city):
    if any(k in city for k in ["青森", "弘前", "黒石", "五所川原", "つがる", "平川", "平内", "今別", "蓬田", "外ヶ浜", "鯵ヶ沢", "深浦", "西目屋", "藤崎", "大鰐", "田舎館", "板柳", "鶴田", "中泊"]):
        return "津軽地方"  # 日本海側
    elif any(k in city for k in ["八戸", "十和田", "三沢", "野辺地", "七戸", "六戸", "横浜", "東北", "六ケ所", "おいらせ", "三戸", "五戸", "田子", "南部", "階上", "新郷"]):
        return "三八上北地方"  # 太平洋側
    else:
        return "下北地方"  # 太平洋側

tohoku_n03["subregion"] = tohoku_n03.apply(
    lambda row: aomori_area(row["N03_004"]) if row["N03_001"] == "青森県" else None, axis=1
)

# 福島の地方区分
def fukushima_area(city):
    if any(k in city for k in ["会津若松", "喜多方", "下郷", "檜枝岐", "只見", "南会津", "北塩原", "西会津", "磐梯", "猪苗代", "会津坂下", "湯川", "柳津", "三島", "金山", "昭和", "会津美里"]):
        return "会津地方"
    elif any(k in city for k in ["福島", "郡山", "白河", "須賀川", "二本松", "田村", "伊達", "本宮", "桑折", "国見", "川俣", "大玉", "鏡石", "天栄", "西郷", "泉崎", "中島", "矢吹", "棚倉", "矢祭", "塙町", "鮫川", "石川", "玉川", "平田", "浅川", "古殿", "三春", "小野"]):
        return "中通り"
    else:
        return "浜通り"

tohoku_n03["subregion"] = tohoku_n03.apply(
    lambda row: fukushima_area(row["N03_004"])
    if (row["N03_001"] == "福島県")
    else row["subregion"],
    axis=1,
)

# === 4. 各「側」を構成する市町村を選択 ===
japan_sea_areas = ["津軽地方", "秋田県", "山形県", "会津地方"]
pacific_areas = ["下北地方", "三八上北地方", "岩手県", "宮城県", "中通り", "浜通り"]

# subregion が空の県名を埋めて県単位で分類
tohoku_n03["area_label"] = tohoku_n03.apply(
    lambda r: r["subregion"] if pd.notna(r["subregion"]) else r["N03_001"], axis=1
)

# 各側で統合
japan_sea_gdf = (
    tohoku_n03[tohoku_n03["area_label"].isin(japan_sea_areas)]
    .dissolve()
    .buffer(0)  # トポロジ修正
)
pacific_gdf = (
    tohoku_n03[tohoku_n03["area_label"].isin(pacific_areas)]
    .dissolve()
    .buffer(0)
)

japan_sea_gdf = tohoku_n03[tohoku_n03["area_label"].isin(japan_sea_areas)].dissolve().to_crs("EPSG:4326")
pacific_gdf = tohoku_n03[tohoku_n03["area_label"].isin(pacific_areas)].dissolve().to_crs("EPSG:4326")

#readcsv
#読み込み
csvdir = '/mnt/jet12/makoto/extract_senjo/ext_sun_edd_a/csv/total_4-10_2000-2024_ratio2.0_allrain.csv'
df = pd.read_csv(csvdir, dtype={'dtst': str, 'dten': str})

#マスキング（緯度経度ver)
dfRRA = df[
    (df['lat'] >= lat1) &
    (df['lat'] <= lat2) &
    (df['lon'] >= lon1) &
    (df['lon'] <= lon2)
]


# === 5. 観測データをGeoDataFrameに変換 ===
dfRRA["geometry"] = dfRRA.apply(lambda r: Point(r["lon"], r["lat"]), axis=1)
dfRRA_gdf = gpd.GeoDataFrame(dfRRA, geometry="geometry", crs="EPSG:4326")

# === 6. 各「側」でマスキング （陸上）===
# bufferを使うため、一時的にメートル単位（EPSG:3857）に変換
sea_buf = japan_sea_gdf.to_crs("EPSG:3857").buffer(1000).to_crs("EPSG:4326")   # 約1km膨張
pac_buf = pacific_gdf.to_crs("EPSG:3857").buffer(1000).to_crs("EPSG:4326")

# GeoSeries → GeoDataFrame に戻す
sea_buf_gdf = gpd.GeoDataFrame(geometry=sea_buf, crs="EPSG:4326")
pac_buf_gdf = gpd.GeoDataFrame(geometry=pac_buf, crs="EPSG:4326")

# 空間結合（陸上）
df_japan_sea = gpd.sjoin(dfRRA_gdf, sea_buf_gdf, predicate="within")
df_pacific   = gpd.sjoin(dfRRA_gdf, pac_buf_gdf, predicate="within")

# 2. 重複除去（日本海側を優先）
dup_index = df_japan_sea.index.intersection(df_pacific.index)
df_pacific = df_pacific[~df_pacific.index.isin(dup_index)]

# === 7. 陸上以外（海上）の点を抽出 ===
land_index = df_japan_sea.index.union(df_pacific.index)
df_offshore = dfRRA_gdf[~dfRRA_gdf.index.isin(land_index)].copy()

# === 8. 海上点を経度で分類 ===
# 境界経度を決める（例：青森～福島あたりでは約141°Eが日本海／太平洋の境界）
lon_boundary = 140.5

def classify_offshore(lon):
    if lon < lon_boundary:
        return "日本海側（海上）"
    else:
        return "太平洋側（海上）"

df_offshore["side"] = df_offshore["lon"].apply(classify_offshore)

# === 9. DataFrame化 ===
cols_to_drop = ["geometry"]
if "index_right" in df_japan_sea.columns:
    cols_to_drop.append("index_right")

df_japan_sea = pd.DataFrame(df_japan_sea.drop(columns=cols_to_drop))
df_pacific   = pd.DataFrame(df_pacific.drop(columns=cols_to_drop))

# 陸上分 + 海上分を統合（任意）
df_japan_sea["side"] = "日本海側"
df_pacific["side"] = "太平洋側"

df_combined = pd.concat([df_japan_sea, df_pacific, df_offshore], ignore_index=True)

# 日本海側（陸＋海上）をまとめる
df_all_sea = df_combined[df_combined["side"].isin(["日本海側", "日本海側（海上）"])]

# 太平洋側（陸＋海上）をまとめる
df_all_pac = df_combined[df_combined["side"].isin(["太平洋側", "太平洋側（海上）"])]