# 国土数値情報の取得

## Library Import

In [1]:
# データの取り扱いに関するライブラリ
import numpy as np # 高速計算
import pandas as pd # 表データの扱い

# 可視化に関するライブラリ
import matplotlib.pyplot as plt
import japanize_matplotlib

import geopandas as gpd
from sklearn.neighbors import NearestNeighbors

import gc
from pathlib import Path
import re
from pyogrio import read_dataframe

import warnings
warnings.filterwarnings('ignore')

In [2]:
# 自身がファイルを格納したディレクトリを指定
ROOT_DIR = '../input/'
train_file_path = ROOT_DIR + 'train.csv'
test_file_path = ROOT_DIR + 'test.csv'
intermediate_path = '../output/intermediate_file/'
gis_path = ROOT_DIR + 'GISデータ/'

pkey_cols = ['target_ym', 'building_id', 'unit_id']
geo_cols = ['lat', 'lon']

geo_ver = 1

## File Import

In [3]:
train_df_geo = pd.read_csv(train_file_path)[pkey_cols + geo_cols]
test_df_geo = pd.read_csv(test_file_path)[pkey_cols + geo_cols]

## データの変換

#### 対象年の抽出

In [4]:
def parse_year(date_input):
    try:
        s = str(date_input)
        if len(s) < 4:
            return np.nan
        return int(s[:4])
    except:
        return np.nan

In [5]:
train_df_geo['target_year'] = train_df_geo['target_ym'].apply(parse_year)
test_df_geo['target_year'] = test_df_geo['target_ym'].apply(parse_year)

#### 測地系の変換(EPSG:6668 → EPSG:4612)

In [6]:
def fill_jgd_from_wgs(df, lon_wgs_col='lon', lat_wgs_col='lat'):
    """
    JGD2011(EPSG:6668) → JGD2000(EPSG:4612) に変換して補完する
    """

    # 2) 世界測地系の GeoDataFrame
    gdf_wgs = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df[lon_wgs_col], df[lat_wgs_col]),
        crs='EPSG:6668'
    )

    # 3) 日本測地系（JGD2000, EPSG:4612）に変換
    gdf_jgd = gdf_wgs.to_crs(epsg=4612)

    # 4) 変換後の座標を el/nl に入れる
    df['lon_jgd'] = gdf_jgd.geometry.x.values  # 経度（4612）
    df['lat_jgd'] = gdf_jgd.geometry.y.values  # 緯度（4612）

    return df

In [7]:
train_df_geo = fill_jgd_from_wgs(train_df_geo)
test_df_geo = fill_jgd_from_wgs(test_df_geo)

In [8]:
train_df_geo

Unnamed: 0,target_ym,building_id,unit_id,lat,lon,target_year,lon_jgd,lat_jgd
0,201901,206271,262186,35.047688,136.637467,2019,136.637467,35.047688
1,201901,83315,35726,35.074625,136.639936,2019,136.639936,35.074625
2,201901,140201,116820,35.072248,136.644708,2019,136.644708,35.072248
3,201901,216551,281648,35.003174,136.875602,2019,136.875602,35.003174
4,201901,134968,85898,35.002403,136.875754,2019,136.875754,35.002403
...,...,...,...,...,...,...,...,...
363919,202207,182260,153084,35.066899,136.688936,2022,136.688936,35.066899
363920,202207,118825,175430,35.064552,136.685451,2022,136.685451,35.064552
363921,202207,92766,314093,35.060995,136.686671,2022,136.686671,35.060995
363922,202207,212417,170589,34.933236,136.851469,2022,136.851469,34.933236


## 地価

In [None]:
land_gdf = read_dataframe(f'{gis_path}公示地価/L01-23_GML/L01-23.geojson') 
print(land_gdf.crs) # EPSG:4612(日本測地系2000)

EPSG:4612


In [11]:
# TODO: 対前年変動率も特徴量に入れたい
land_gdf = land_gdf.rename(columns={
    'L01_005': 'land_year',            # 年度（例: 2023）
    'L01_006': 'land_price_2023',    # 公示地価 [円/㎡]
    'L01_007': 'land_price_yoy_pct',   # 対前年変動率 [%]
})

year_to_col = {y: f'L01_{y - 1921:03d}' for y in range(2018, 2023)}
rename_price_cols = {col: f'land_price_{year}' 
                     for year, col in year_to_col.items()}

land_gdf = land_gdf.rename(columns=rename_price_cols)

In [12]:
def add_land_price_features_haversine(
    train_df: pd.DataFrame,
    test_df: pd.DataFrame,
    land_gdf: gpd.GeoDataFrame,
    lat_col: str,
    lon_col: str,
    year_col: str,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    各物件について、同じ year の land_price_YYYY を持つ標準地から

    - 最近傍1点の地価: nearest_land_price
    - その距離[m]: distance_to_landpoint_m
    - log(nearest_land_price + 1): log_land_price
    - 最近傍3点距離加重平均地価: weighted_land_price_3
    - その log: log_weighted_land_price_3

    を haversine 距離（球面距離）を用いて付与する。
    緯度経度は「度（deg）」前提（JGD2000/JGD2011/WGS84 いずれでも OK、揃っていればよい）。
    """

    # 1) train/test を結合
    combined_df = pd.concat([train_df, test_df], ignore_index=True)

    # 2) 座標がまともな行だけ対象
    lat = combined_df[lat_col]
    lon = combined_df[lon_col]

    valid_mask = (
        lat.notna() & lon.notna()
        & lat.between(-90, 90)
        & lon.between(-180, 180)
    )
    df_valid = combined_df.loc[valid_mask].copy()

    n_all = len(combined_df)
    nearest_all  = np.full(n_all, np.nan, dtype=float)
    dist_all     = np.full(n_all, np.nan, dtype=float)
    weighted_all = np.full(n_all, np.nan, dtype=float)
    nearest_prev_all  = np.full(n_all, np.nan, dtype=float)
    weighted_prev_all = np.full(n_all, np.nan, dtype=float)

    # 3) 標準地側の座標（land_gdf）をラジアンに変換
    land_lon_deg = land_gdf.geometry.x.to_numpy()
    land_lat_deg = land_gdf.geometry.y.to_numpy()

    land_lon_rad = np.deg2rad(land_lon_deg)
    land_lat_rad = np.deg2rad(land_lat_deg)

    land_X = np.vstack([land_lat_rad, land_lon_rad]).T  # (n_land, 2)

    # 4) 最近傍 3点探索 (haversine)
    nn = NearestNeighbors(n_neighbors=3, metric="haversine")
    nn.fit(land_X)

    # 5) 物件側の座標をラジアンに変換
    prop_lat_deg = df_valid[lat_col].to_numpy()
    prop_lon_deg = df_valid[lon_col].to_numpy()

    prop_lat_rad = np.deg2rad(prop_lat_deg)
    prop_lon_rad = np.deg2rad(prop_lon_deg)

    prop_X = np.vstack([prop_lat_rad, prop_lon_rad]).T  # (n_valid, 2)

    finite_mask = np.isfinite(prop_lat_rad) & np.isfinite(prop_lon_rad)
    valid_index = df_valid.index.to_numpy()[finite_mask]

    if len(valid_index) > 0:
        # 距離は「ラジアン」で返ってくる
        distances_rad, indices = nn.kneighbors(prop_X[finite_mask])  # (n_valid, 3)

        # 年情報
        years_valid = df_valid.loc[valid_index, year_col].to_numpy().astype(int)

        nearest_prices  = []
        nearest_dists_m = []
        weighted_prices = []
        nearest_prev_prices  = []
        weighted_prev_prices = []

        R = 6_371_000.0  # 地球半径[m]

        for dist_row_rad, idx_row, year in zip(distances_rad, indices, years_valid):
            # 距離[m] に変換
            dist_row_m = dist_row_rad * R

            col_y   = f'land_price_{year}'
            col_y_1 = f'land_price_{year-1}'

            prices_prev = land_gdf.iloc[idx_row][col_y_1].to_numpy().astype(float)
            prices = land_gdf.iloc[idx_row][col_y].to_numpy().astype(float)

            # --- 1点目（最近傍）の情報 ---
            nearest_prices.append(prices[0])
            nearest_dists_m.append(dist_row_m[0])

            # --- 3点距離加重平均 ---
            zero_mask = dist_row_m == 0
            if zero_mask.any():
                wp = prices[zero_mask].mean()
            else:
                w = 1.0 / dist_row_m
                w = w / w.sum()
                wp = np.dot(w, prices)
            weighted_prices.append(wp)

            nearest_prev_prices.append(prices_prev[0])
            if zero_mask.any():
                wp_prev = prices_prev[zero_mask].mean()
            else:
                wp_prev = np.dot(w, prices_prev)
            weighted_prev_prices.append(wp_prev)

        nearest_prices  = np.array(nearest_prices, dtype=float)
        nearest_dists_m = np.array(nearest_dists_m, dtype=float)
        weighted_prices = np.array(weighted_prices, dtype=float)
        nearest_prev_prices  = np.array(nearest_prev_prices, dtype=float)
        weighted_prev_prices = np.array(weighted_prev_prices, dtype=float)

        nearest_all[valid_index]  = nearest_prices
        dist_all[valid_index]     = nearest_dists_m
        weighted_all[valid_index] = weighted_prices
        nearest_prev_all[valid_index]  = nearest_prev_prices
        weighted_prev_all[valid_index] = weighted_prev_prices

    # 6) combined_df に列を追加
    combined_df = combined_df.copy()
    combined_df['nearest_land_price']       = nearest_all
    combined_df['distance_to_landpoint_m']  = dist_all
    combined_df['log_land_price']           = np.log1p(combined_df['nearest_land_price'])
    combined_df['weighted_land_price_3']    = weighted_all
    combined_df['log_weighted_land_price_3'] = np.log1p(combined_df['weighted_land_price_3'])
    combined_df['nearest_land_price_prev']      = nearest_prev_all
    combined_df['weighted_land_price_3_prev']   = weighted_prev_all

    eps = 1.0  # 分母が小さいときの暴れ抑制（単位に応じて調整可）
    denom_n = np.maximum(combined_df['nearest_land_price_prev'], eps)
    denom_w = np.maximum(combined_df['weighted_land_price_3_prev'], eps)

    combined_df['land_price_yoy_nearest'] = (combined_df['nearest_land_price'] - combined_df['nearest_land_price_prev']) / denom_n
    combined_df['land_price_yoy_w3']      = (combined_df['weighted_land_price_3'] - combined_df['weighted_land_price_3_prev']) / denom_w

    # 変動率（ログ差分：推奨）
    combined_df['land_price_dlog_nearest'] = np.log1p(combined_df['nearest_land_price']) - np.log1p(combined_df['nearest_land_price_prev'])
    combined_df['land_price_dlog_w3']      = np.log1p(combined_df['weighted_land_price_3']) - np.log1p(combined_df['weighted_land_price_3_prev'])

    # 7) train/test に戻す
    n_train = len(train_df)
    train_out = combined_df.iloc[:n_train].reset_index(drop=True)
    test_out  = combined_df.iloc[n_train:].reset_index(drop=True)

    return train_out, test_out


In [13]:
train_df_geo, test_df_geo = add_land_price_features_haversine(
    train_df=train_df_geo,
    test_df=test_df_geo,
    land_gdf=land_gdf,
    lat_col='lat_jgd',
    lon_col='lon_jgd',
    year_col='target_year'
)

## 1kmメッシュ将来人口

In [14]:
fu_pop_path = gis_path + '人口メッシュ/1km_mesh_2024_GEOJSON/future_pop_1km.parquet'

In [15]:
# in_dir = Path(gis_path + '人口メッシュ/1km_mesh_2024_GEOJSON')
# files = sorted(in_dir.glob('1km_mesh_2024_*_GEOJSON/*.geojson'))
# output_path = gis_path + '人口メッシュ/1km_mesh_2024_GEOJSON'

# gdfs = []
# base_crs = None

# keep_cols = [
#     'MESH_ID',
#     'SHICODE',
#     'PTN_2020', # 2020年の総数人口
#     'PTN_2025', 'PTN_2030', 'PTN_2035', 'PTN_2040', 'PTN_2045', 'PTN_2050', # 将来人口
#     'PTA_2025', 'RTA_2025', # 2025年男女計0～14歳人口/比率
#     'PTB_2025', 'RTB_2025', # 2025年男女計15～64歳人口/比率
#     'PTC_2025', 'RTC_2025', # 2025年男女計65歳以上人口/比率
#     'PTD_2025', 'RTD_2025', # 2025年男女計75歳以上人口/比率
#     'PTE_2025', 'RTE_2025', # 2025年男女計80歳以上人口/比率
#     'geometry',
# ]

In [16]:
# for fp in files:
#     gdf = read_dataframe(fp) # 高速化しないとめちゃ時間かかる
#     gdf_filtered = gdf[keep_cols]

#     # CRS を統一（最初のファイルの CRS に合わせる）
#     if base_crs is None:
#         base_crs = gdf.crs
#     else:
#         if gdf.crs != base_crs:
#             gdf = gdf.to_crs(base_crs)

#     gdfs.append(gdf_filtered)

# # 結合（index を振り直す）
# fu_pop_df = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=base_crs)
# print(f"crs: {fu_pop_df.crs}")

# del gdfs
# gc.collect()

In [17]:
# # --- 1) 対象年の列を抽出（例：PTN_2025, PTN_2030, ..., PTN_2070）
# ptn_cols = sorted(
#     [c for c in fu_pop_df.columns if re.match(r'^PTN_20\d{2}$', c)]
# )

# years = np.array([int(c.split('_')[1]) for c in ptn_cols])

# # --- 2) 線形回帰で傾きを計算
# def _calc_slope(row):
#     y = row[ptn_cols].values.astype(float)
#     if np.isnan(y).sum() >= len(y) - 1:
#         return np.nan
#     slope, _ = np.polyfit(years, y, 1)
#     return slope

# fu_pop_df['pop_trend_slope'] = fu_pop_df.apply(_calc_slope, axis=1)

# # --- 3) 正規化（0 除算・欠損対策込み）
# fu_pop_df['pop_trend_rate'] = (
#     fu_pop_df['pop_trend_slope'] /
#     fu_pop_df['PTN_2025'].replace({0: np.nan})
# )

In [18]:
# # ① 平面系へ
# fu_pop_df_m = fu_pop_df.to_crs('EPSG:6677')

# # ② centroid で Point 化
# fu_pop_df_m['geometry'] = fu_pop_df_m.geometry.centroid

# # ③ 必要なら緯度経度へ戻す
# fu_pop_df_pt = fu_pop_df_m.to_crs('EPSG:4612')
# fu_pop_df_pt['lon_4612'] = fu_pop_df_pt.geometry.x
# fu_pop_df_pt['lat_4612'] = fu_pop_df_pt.geometry.y

In [19]:
# fu_pop_df_pt.to_parquet(output_path + '/future_pop_1km.parquet')

#### 学習データ・予測データに結合して特徴量作成

In [20]:
fu_pop_df = gpd.read_parquet(fu_pop_path)

In [21]:
# #NOTE: 歪みが気になるのであればclipを用いる
# display(fu_pop_df[['pop_trend_slope', 'pop_trend_rate']].describe())
# q_low, q_high = fu_pop_df['pop_trend_rate'].quantile([0.01, 0.99])

# fu_pop_df['pop_trend_rate_clip'] = (
#     fu_pop_df['pop_trend_rate']
#     .clip(lower=q_low, upper=q_high)
# )

# display(fu_pop_df[['pop_trend_slope', 'pop_trend_rate_clip']].describe())

In [22]:
def _ensure_point_gdf_from_lonlat(df: pd.DataFrame,
                                 lon_col: str,
                                 lat_col: str,
                                 crs: str) -> gpd.GeoDataFrame:
    gdf = gpd.GeoDataFrame(
        df.copy(),
        geometry=gpd.points_from_xy(df[lon_col], df[lat_col]),
        crs=crs
    )
    return gdf

def _idw(values: np.ndarray, dists: np.ndarray, power: float = 1.0, eps: float = 1e-6) -> np.ndarray:
    # values: (n_samples, k), dists: (n_samples, k)
    w = 1.0 / np.maximum(dists, eps) ** power
    return np.sum(w * values, axis=1) / np.sum(w, axis=1)

def add_pop_knn_features(train_df_geo: pd.DataFrame,
                         test_df_geo: pd.DataFrame,
                         fu_pop_df: gpd.GeoDataFrame,
                         pop_cols: list[str],
                         lon_col: str = 'lon_jgd',
                         lat_col: str = 'lat_jgd',
                         in_crs: str = 'EPSG:4612',
                         work_crs: str = 'EPSG:6677',
                         k: int = 3,
                         idw_power: float = 1.0) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    train/test の各点に対して、人口メッシュ点の
    - 最近傍（nn）
    - k近傍の距離加重平均（idw）
    を付与する。
    """
    if k < 1:
        raise ValueError('k must be >= 1')

    # --- 1) train/test を Point GeoDataFrame 化（EPSG:4612）
    tr_gdf = _ensure_point_gdf_from_lonlat(train_df_geo, lon_col, lat_col, in_crs)
    te_gdf = _ensure_point_gdf_from_lonlat(test_df_geo, lon_col, lat_col, in_crs)

    # --- 2) population を Point 化（Polygon の場合は centroid）
    pop_gdf = fu_pop_df.copy()
    if pop_gdf.geometry.iloc[0].geom_type != 'Point':
        pop_gdf = pop_gdf.to_crs(work_crs)
        pop_gdf['geometry'] = pop_gdf.geometry.centroid
        pop_gdf = pop_gdf.to_crs(in_crs)

    # --- 3) 作業用 CRS（メートル系）へ
    tr_m = tr_gdf.to_crs(work_crs)
    te_m = te_gdf.to_crs(work_crs)
    pop_m = pop_gdf.to_crs(work_crs)

    # --- 4) KNN 検索（メートル座標で）
    pop_xy = np.column_stack([pop_m.geometry.x.values, pop_m.geometry.y.values])
    nn = NearestNeighbors(n_neighbors=k, algorithm='auto', metric='euclidean')
    nn.fit(pop_xy)

    def _apply(gdf_m: gpd.GeoDataFrame, prefix: str) -> pd.DataFrame:
        q_xy = np.column_stack([gdf_m.geometry.x.values, gdf_m.geometry.y.values])
        dists, idxs = nn.kneighbors(q_xy, return_distance=True)  # shapes: (n, k)

        out = pd.DataFrame(index=gdf_m.index)

        # 最近傍距離
        out[f'{prefix}_dist_nn_m'] = dists[:, 0].astype(np.float32)

        # 各 pop_col について nn と idw を作る
        for col in pop_cols:
            vals = pop_m[col].values[idxs]  # (n, k)

            out[f'{col}_nn'] = vals[:, 0]

            if k >= 2:
                out[f'{col}_idw{k}'] = _idw(vals.astype(float), dists.astype(float), power=idw_power)
            else:
                out[f'{col}_idw{k}'] = vals[:, 0]

        return out

    tr_feat = _apply(tr_m, 'pop')
    te_feat = _apply(te_m, 'pop')

    train_out = train_df_geo.join(tr_feat)
    test_out = test_df_geo.join(te_feat)

    return train_out, test_out


In [23]:
pop_cols = [
    'PTN_2020', # 2020年の総数人口
    'RTA_2025', # 2025年男女計0～14歳人口/比率
    'RTB_2025', # 2025年男女計15～64歳人口/比率
    'RTC_2025', # 2025年男女計65歳以上人口/比率
    'RTD_2025', # 2025年男女計75歳以上人口/比率
    'RTE_2025', # 2025年男女計80歳以上人口/比率
    'pop_trend_rate'
]

train_df_geo, test_df_geo = add_pop_knn_features(
    train_df_geo=train_df_geo,
    test_df_geo=test_df_geo,
    fu_pop_df=fu_pop_df,
    pop_cols=pop_cols,
    k=3,
    idw_power=1.0
)

## 道路密度・道路延長メッシュ

#### データの結合（最初の1回のみなのでコメントアウト）

In [41]:
in_dir = Path(gis_path + '道路密度・道路延長メッシュ')
files = sorted(in_dir.glob('N04-10_*-jgd_GML/*.shp'))
output_path = gis_path + '道路密度・道路延長メッシュ'

gdfs = []

# keep_cols = [
#     'MESH_ID',
#     'SHICODE',
#     'PTN_2020', # 2020年の総数人口
#     'PTN_2025', 'PTN_2030', 'PTN_2035', 'PTN_2040', 'PTN_2045', 'PTN_2050', # 将来人口
#     'PTA_2025', 'RTA_2025', # 2025年男女計0～14歳人口/比率
#     'PTB_2025', 'RTB_2025', # 2025年男女計15～64歳人口/比率
#     'PTC_2025', 'RTC_2025', # 2025年男女計65歳以上人口/比率
#     'PTD_2025', 'RTD_2025', # 2025年男女計75歳以上人口/比率
#     'PTE_2025', 'RTE_2025', # 2025年男女計80歳以上人口/比率
#     'geometry',
# ]

In [42]:
for fp in files:
    gdf = read_dataframe(fp) # 高速化しないとめちゃ時間かかる
    # gdf_filtered = gdf[keep_cols]

    # CRS を統一（最初のファイルの CRS に合わせる）
    if gdf.crs is None:
        gdf = gdf.set_crs('EPSG:4612')
        
    gdfs.append(gdf)

# 結合（index を振り直す）
road_df = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True), crs=base_crs)
print(f"crs: {road_df.crs}")

del gdfs
gc.collect()

crs: EPSG:4612


0

In [44]:
road_df.head(3)

Unnamed: 0,N04_001,N04_002,N04_003,N04_004,N04_005,N04_006,N04_007,N04_008,N04_009,N04_010,...,N04_048,N04_049,N04_050,N04_051,N04_052,N04_053,N04_054,N04_055,N04_056,geometry
0,36225700,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,"POLYGON ((122.87500 24.41667, 122.87500 24.425..."
1,36225701,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,"POLYGON ((122.88750 24.41667, 122.88750 24.425..."
2,36225702,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,...,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,unknown,"POLYGON ((122.90000 24.41667, 122.90000 24.425..."


In [57]:
road_df[(road_df['N04_003'] != '0')&(road_df['N04_003'] != 'unknown')][['N04_003', 'N04_004', 'N04_005', 'N04_006', 'N04_007', 'N04_008',]]

Unnamed: 0,N04_003,N04_004,N04_005,N04_006,N04_007,N04_008
21038,1,0,0,0,1055,961
69212,1,1,0,0,462,431
71479,1,1,0,0,39,36
71580,2,0,0,0,1391,1302
122674,2,0,2,0,2378,2235
...,...,...,...,...,...,...
386432,2,2,0,0,1361,1446
386443,2,2,0,0,943,1002
386463,2,0,0,2,1647,1751
386473,2,2,0,0,283,301


## 駅乗降者数

## 特徴量の追加・削除

In [25]:
train_df_geo.columns

Index(['target_ym', 'building_id', 'unit_id', 'lat', 'lon', 'target_year',
       'lon_jgd', 'lat_jgd', 'nearest_land_price', 'distance_to_landpoint_m',
       'log_land_price', 'weighted_land_price_3', 'log_weighted_land_price_3',
       'nearest_land_price_prev', 'weighted_land_price_3_prev',
       'land_price_yoy_nearest', 'land_price_yoy_w3',
       'land_price_dlog_nearest', 'land_price_dlog_w3', 'pop_dist_nn_m',
       'PTN_2020_nn', 'PTN_2020_idw3', 'RTA_2025_nn', 'RTA_2025_idw3',
       'RTB_2025_nn', 'RTB_2025_idw3', 'RTC_2025_nn', 'RTC_2025_idw3',
       'RTD_2025_nn', 'RTD_2025_idw3', 'RTE_2025_nn', 'RTE_2025_idw3',
       'pop_trend_rate_nn', 'pop_trend_rate_idw3'],
      dtype='object')

In [26]:
add_cols = [
    'nearest_land_price', 'weighted_land_price_3', 'distance_to_landpoint_m', 'log_land_price', 'log_weighted_land_price_3',
    'land_price_yoy_nearest', 'land_price_yoy_w3', 'land_price_dlog_nearest', 'land_price_dlog_w3',
    'PTN_2020_nn', 'PTN_2020_idw3', 'RTA_2025_nn', 'RTA_2025_idw3',
    'RTB_2025_nn', 'RTB_2025_idw3', 'RTC_2025_nn', 'RTC_2025_idw3',
    'RTD_2025_nn', 'RTD_2025_idw3', 'RTE_2025_nn', 'RTE_2025_idw3',
    'pop_trend_rate_nn', 'pop_trend_rate_idw3'
]

## 出力

In [29]:
train_df_geo[pkey_cols + add_cols].to_parquet(f'{intermediate_path}train_df_geo_v{geo_ver}.parquet')
test_df_geo[pkey_cols + add_cols].to_parquet(f'{intermediate_path}test_df_geo_v{geo_ver}.parquet')