In [None]:
# intersection pct > n

In [None]:
from glob import glob
from collections import defaultdict

import pandas as pd
import geopandas as gpd

In [None]:
def read_gdf(file_path, encoding):
    gdf = gpd.read_file(file_path, encoding=encoding)
    return gdf


def gdf2df(gdf):
    return pd.DataFrame(gdf)


read_gdf_with_cp949 = lambda file_path: read_gdf(file_path, "cp949")
read_shp = lambda file_path: gdf2df(read_gdf_with_cp949(file_path))

In [None]:
def read_excel(file_path, params):
    return pd.read_excel(file_path, **params)


read_excel_params = {
    "index_col": None,
    "header": 1,
    "converters": {"시도코드": str, "시군구코드": str, "읍면동코드": str},
}

read_excel_with_params = lambda file_path: read_excel(file_path, read_excel_params)

In [None]:
def read_csv(file_path, params):
    return pd.read_csv(file_path, **params)

read_csv_params = {
    "dtype": {"시도코드": str, "시군구코드": str, "법정동코드": str},
}

read_csv_with_params = lambda file_path: read_csv(file_path, read_csv_params)

In [None]:
# READ hdong

In [None]:
## params
hdong_shp_file_path = "data/raw_data/행정동_좌표/BND_ADM_DONG_PG/BND_ADM_DONG_PG.shp"
hdong_excel_file_path = "./data/raw_data/행정동_좌표/센서스 공간정보 지역 코드.xlsx"

preproc_cols = ["detail_name", "detail_code", "rough_name", "geometry"]

In [None]:
hdong_shp_df = read_shp(hdong_shp_file_path)
hdong_shp_meta_df = read_excel_with_params(hdong_excel_file_path)

In [None]:
hdong_shp_meta_df["ADM_CD"] = (
    hdong_shp_meta_df["시도코드"] + hdong_shp_meta_df["시군구코드"] + hdong_shp_meta_df["읍면동코드"]
)
hdong_shp_meta_df["rough_name"] = hdong_shp_meta_df["시도명칭"] + "_" + hdong_shp_meta_df["시군구명칭"].fillna("")
hdong_shp_meta_df["rough_name"] = hdong_shp_meta_df["rough_name"].apply(lambda x: x.strip("_"))

In [None]:
preproc_hdong_shp_df = hdong_shp_df.merge(
    hdong_shp_meta_df.loc[:, ["ADM_CD", "rough_name"]],
    on=["ADM_CD"],
    how="left"
    )

preproc_hdong_shp_df = preproc_hdong_shp_df.loc[:, ["ADM_NM", "ADM_CD", "rough_name", "geometry"]]
preproc_hdong_shp_df.columns = preproc_cols

In [None]:
# READ bdong

In [None]:
# Params
bdong_shp_paths = glob("./data/raw_data/법정동_좌표/*/*.shp")
bdong_csv_path = "./data/preproc_data/bdong_raw.csv"

In [None]:
bdong_shp_df = pd.concat([read_shp(shp_path) for shp_path in bdong_shp_paths])
bdong_base = read_csv_with_params(bdong_csv_path)

In [None]:
bdong_base["rough_name"] = bdong_base["시도명"] + "_" + bdong_base["시군구명"].fillna("")
bdong_base["rough_name"] = bdong_base["rough_name"].apply(lambda x: x.strip("_"))

rough_code_name_dict = bdong_base.set_index("시군구코드")["rough_name"].to_dict()

In [None]:
bdong_shp_df["rough_name"] = bdong_shp_df["COL_ADM_SE"].astype(str).map(rough_code_name_dict)
preproc_bdong_shp_df = bdong_shp_df.loc[:, ["EMD_NM", "EMD_CD", "rough_name", "geometry"]]
preproc_bdong_shp_df.columns = preproc_cols

In [None]:
# check data

In [None]:
bdong_shp_df = preproc_bdong_shp_df.copy()
hdong_shp_df = preproc_hdong_shp_df.copy()

In [None]:
hdong_rough_names = set(hdong_shp_df["rough_name"])
bdong_rough_names = set(bdong_shp_df["rough_name"])
bdong_rough_names - hdong_rough_names, hdong_rough_names - bdong_rough_names

In [None]:
preproc_1 = lambda x : x.split()[0]

hdong_rough_names = set(hdong_shp_df["rough_name"].apply(preproc_1))
bdong_rough_names = set(bdong_shp_df["rough_name"])
bdong_rough_names - hdong_rough_names, hdong_rough_names - bdong_rough_names

In [None]:
preproc_1 = lambda x : x.split()[0]
preproc_2 = lambda x : x.replace("전라북도",'전북특별자치도')

preproc = lambda x : preproc_2(preproc_1(x))

hdong_rough_names = set(hdong_shp_df["rough_name"].apply(preproc))
bdong_rough_names = set(bdong_shp_df["rough_name"])
bdong_rough_names - hdong_rough_names, hdong_rough_names - bdong_rough_names

In [None]:
preproc_1 = lambda x: x.split()[0]
preproc_2 = lambda x: x.replace("전라북도", "전북특별자치도")
preproc_3 = lambda x: "세종특별자치시" if x == "세종특별자치시_세종시" else x
preproc = lambda x: preproc_3(preproc_2(preproc_1(x)))

hdong_rough_names = set(hdong_shp_df["rough_name"].apply(preproc))
bdong_rough_names = set(bdong_shp_df["rough_name"])
bdong_rough_names - hdong_rough_names, hdong_rough_names - bdong_rough_names

In [None]:
# rough_name unify

hdong_shp_df["rough_name"] = hdong_shp_df["rough_name"].apply(preproc)
bdong_shp_df["rough_name"] = bdong_shp_df["rough_name"].apply(preproc)

In [None]:
# Preproc Done
display(hdong_shp_df.sample(5))
display(bdong_shp_df.sample(5))

In [None]:
def calc_intersection_pct(polygon_a, polygon_b):
    intersection_area = polygon_a.intersection(polygon_b).area
    base_area = min(polygon_a.area,polygon_b.area)
    return intersection_area / base_area


def get_a2b_dict(a_df, b_df, pct):
    a2b_dict = defaultdict(lambda: [])
    for a_idx, a_row in a_df.iterrows():
        candidate_df = b_df[b_df["rough_name"] == a_row["rough_name"]]
        for b_idx, b_row in candidate_df.iterrows():
            if calc_intersection_pct(a_row["geometry"], b_row["geometry"]) > pct:
                a2b_dict[a_row["detail_code"]].append(b_row["detail_code"])
                
        if len(a2b_dict[a_row["detail_code"]]) == 0:
            a2b_dict[a_row["detail_code"]].append("None")
    return a2b_dict

In [None]:
pct = 0.3
bdong2hdong_dict = get_a2b_dict(bdong_shp_df, hdong_shp_df, pct)
hdong2bdong_dict = get_a2b_dict(hdong_shp_df, bdong_shp_df, pct)

In [None]:
len(hdong_shp_df), len(hdong2bdong_dict)

In [None]:
len(bdong_shp_df), len(bdong2hdong_dict)

In [None]:
bdong2hdong_df = pd.DataFrame([bdong2hdong_dict]).T.reset_index()
bdong2hdong_df.columns = ["bdong", "hdong"]
bdong2hdong_df = bdong2hdong_df.explode("hdong")

hdong2bdong_df = pd.DataFrame([hdong2bdong_dict]).T.reset_index()
hdong2bdong_df.columns = ["hdong", "bdong"]
hdong2bdong_df = hdong2bdong_df.explode("bdong")

In [None]:
hdong_code = hdong2bdong_df['hdong'].sample().iloc[0]
hdong_code = '11010530'
bdong_codes = set(hdong2bdong_df[hdong2bdong_df["hdong"] == hdong_code]["bdong"])
rough_name = hdong_shp_df[hdong_shp_df["detail_code"] == hdong_code]["rough_name"].iloc[0]


hdong_tmp = hdong_shp_df[hdong_shp_df["detail_code"] == hdong_code]
bdong_tmp = bdong_shp_df[bdong_shp_df["detail_code"].isin(bdong_codes)]
base_tmp = hdong_shp_df[hdong_shp_df["rough_name"] == rough_name]
bdong_tmp

In [None]:
# EDA (Check result)

In [None]:
import matplotlib.pyplot as plt

In [None]:
fig, ax = plt.subplots(figsize=(6,6))


gdf = gpd.GeoDataFrame(base_tmp, geometry=base_tmp["geometry"])
gdf.plot(ax=ax, color="grey", label="included")

gdf = gpd.GeoDataFrame(bdong_tmp, geometry=bdong_tmp["geometry"])
gdf.plot(ax=ax, color="blue", label="included")

gdf = gpd.GeoDataFrame(hdong_tmp, geometry=hdong_tmp["geometry"])
gdf.plot(ax=ax, color="red", label="main")
fig.tight_layout()
# ax.set_title()

In [None]:
# save
import json

In [None]:
with open("./data/preproc_data/행정동_법정동_매퍼/by_intersection_n_pct/bdong2hdong_dict.json", "w") as f:
    json.dump(bdong2hdong_dict, f, indent=4, ensure_ascii=False)

with open("./data/preproc_data/행정동_법정동_매퍼/by_intersection_n_pct/hdong2bdong_dict.json", "w") as f:
    json.dump(hdong2bdong_dict, f, indent=4, ensure_ascii=False)

In [None]:
bdong2hdong_df.reset_index(drop=True).to_csv(
    "./data/preproc_data/행정동_법정동_매퍼/by_intersection_n_pct/bdong2hdong_df.csv", index=False
)
hdong2bdong_df.reset_index(drop=True).to_csv(
    "./data/preproc_data/행정동_법정동_매퍼/by_intersection_n_pct/hdong2bdong_df.csv", index=False
)

In [None]:
bdong = bdong_shp_df.copy()
hdong = hdong_shp_df.copy()

In [None]:
bdong["total_name"] = bdong["rough_name"] + "_" + bdong["detail_name"]
bdong_code_name_dict = bdong.set_index("detail_code")["total_name"].to_dict()

In [None]:
hdong["total_name"] = hdong["rough_name"] + "_" + hdong["detail_name"]
hdong_code_name_dict = hdong.set_index("detail_code")["total_name"].to_dict()

In [None]:
with open("./data/preproc_data/행정동_법정동_매퍼/bdong_code_name_dict.json", "w") as f:
    json.dump(bdong_code_name_dict, f, indent=4, ensure_ascii=False)

with open("./data/preproc_data/행정동_법정동_매퍼/hdong_code_name_dict.json", "w") as f:
    json.dump(hdong_code_name_dict, f, indent=4, ensure_ascii=False)

In [None]:
bdong2hdong_df["bdong_name"] = bdong2hdong_df["bdong"].map(bdong_code_name_dict)
bdong2hdong_df["hdong_name"] = bdong2hdong_df["hdong"].map(hdong_code_name_dict)
hdong2bdong_df["hdong_name"] = hdong2bdong_df["hdong"].map(hdong_code_name_dict)
hdong2bdong_df["bdong_name"] = hdong2bdong_df["bdong"].map(bdong_code_name_dict)

In [None]:
display(hdong2bdong_df.sample(5))
display(bdong2hdong_df.sample(5))