In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import re
from functools import partial
from time import sleep
from datetime import datetime

import sys
import os
sys.path.append(os.path.abspath(".."))
from opt_emergency.api import NaverMap, PublicDataPortal, NearestEr

## community_center_df

In [None]:
community_center_df = pd.read_csv("../data/raw/행정안전부_읍면동 하부행정기관 현황_20240731.csv", encoding="cp949")
community_center_df.drop("연번", axis=1, inplace=True)
community_center_df.rename({"주    소": "주소"}, axis=1, inplace=True)

community_center_df.loc[community_center_df[community_center_df["시도"] == "세종"].index, "시군구"] = "세종특별자치시"

def fix_spacing(name: str) -> str:
    pattern = r".+시+.+구"
    if " " in name and (not bool(re.match(pattern, name))):
        return name.replace(" ", "")
    else:
        return name
for col in ["시도", "시군구", "읍면동"]:
    community_center_df[col] = community_center_df[col].map(fix_spacing)

# 오타 교정
typo_idx = community_center_df[community_center_df["읍면동"].map(lambda x: x[-4:]) == "주민센토"].index
community_center_df.loc[typo_idx, "읍면동"] = community_center_df.loc[typo_idx, "읍면동"].str.replace("주민센토", "주민센터")

# 임시청사 글자 제외 및 중간점 -> 온점 변경
community_center_df["읍면동"] = community_center_df["읍면동"].map(lambda x: x.replace("ㆍ", ".").replace("(임시청사)", ""))

community_center_df["센터명"] = community_center_df["읍면동"]
center_name = ["행정복지센터", "행정주민센터", "주민센터", "사무소"]
community_center_df["읍면동"] = community_center_df["읍면동"].map(lambda x: re.sub("|".join(center_name), "", x))

# 남양주시 이름 처리
rename_dict = {
    "와부조안": "와부읍",
    "진접오남": "진접읍",
    "화도수동": "화도읍",
    "진건퇴계원": "진건읍",
    "호평평내": "호평동",
    "금곡양정": "금곡동",
    "다산": "다산1동",
    "별내": "별내동",
}
community_center_df.replace(rename_dict, inplace=True)

# 군위 중복 제거
community_center_df = community_center_df[
    ~((community_center_df["시도"] == "경북") & (community_center_df["시군구"] == "군위군"))
]

# 오타 수정 및 업데이트
typo_mapping = {
    "곤지암": "곤지암읍",
    "호령면": "효령면",
    "벡아면": "백아면",
    "반원중앙동": "반월중앙동",
    "금수면": "금수강산면",
    "봉명제2.송정동": "봉명2.송정동",
    "성내충인동": "성내.충인동",
    "호암직동": "호암.직동",
    "칠금금릉동": "칠금.금릉동",
    "목행용탄동": "목행.용탄동",
}
for key, val in typo_mapping.items():
    community_center_df.loc[community_center_df["읍면동"] == key, "읍면동"] = val

# 창원 용지동 구 오류 수정
community_center_df.loc[community_center_df["읍면동"] == "용지동", "시군구"] = "창원시 성산구"

# 안양시, 전주시 구 포함
def include_gu(row):
    if row["시군구"] in ["안양시", "전주시"]:
        gu = row["주소"].split(" ")[2]
        return row["시군구"] + " " + gu
    else:
        return row["시군구"]

community_center_df["시군구"] = community_center_df.apply(include_gu, axis=1)

# 동 이름 포맷 맞추기
def fix_dong_name(name: str) -> str:
    name = name.replace("·", ".")

    if bool(re.match(r"^.*제\d+(\.\d+)?동$", name)):
        name = name.replace("제", "")

    search_num = re.search(r"\d{2,}", name)
    if bool(search_num) and int(search_num.group()) >= 10:
        name = re.sub(r'(\d)(?=\d)', r'\1.', name)

    return name
community_center_df["읍면동"] = community_center_df["읍면동"].map(fix_dong_name)

# 의정부3동, 양주 회천4동 없어짐 반영
community_center_df = community_center_df[~community_center_df["읍면동"].isin( ["의정부3동", "회천4동"])]

# 누락된 주민센터 추가
center_add = [
    ["경기", "과천시", "중앙동", "13803", "경기 과천시 관문로 136", "중앙동행정복지센터"],
    ["경기", "파주시", "장단면", "10800", "경기 파주시 군내면 통일촌길 220", "장단면행정복지센터"],
    ["경기", "파주시", "운정4동", "10909", "경기 파주시 하우3길 77", "운정4동행정복지센터"],
    ["경기", "파주시", "운정5동", "10884", "경기 파주시 책향기로 283 1층", "운정5동행정복지센터"],
    ["경기", "파주시", "운정6동", "10956", "경기 파주시 청석로 115 5층", "운정6동행정복지센터"],
    ["경기", "화성시", "동탄9동", "18492", "화성시 동탄신리천로9길 76", "동탄9동행정복지센터"],
    ["경기", "양주시", "옥정1동", "11465", "경기 양주시 옥정로 397-7", "옥정1동행정복지센터"],
    ["경기", "양주시", "옥정2동", "11473", "경기 양주시 옥정동로7가길 4", "옥정2동행정복지센터"]
]
center_add_df = pd.DataFrame(center_add, columns=community_center_df.columns)
community_center_df = pd.concat([community_center_df, center_add_df])

community_center_df.sort_values(["시도", "시군구", "읍면동"], inplace=True)
community_center_df.reset_index(drop=True, inplace=True)

In [None]:
nm = NaverMap()

lon_list = list()
lat_list = list()

for i in tqdm(range(community_center_df.shape[0])):
    lon, lat = nm.get_lon_lat(community_center_df["주소"].iloc[i])
    lon_list.append(lon)
    lat_list.append(lat)

community_center_df["경도"] = lon_list
community_center_df["위도"] = lat_list

In [None]:
nm = NaverMap()

for row in community_center_df[community_center_df["경도"].isna()].itertuples():
    name = getattr(row, "시군구") + " " + getattr(row, "센터명")
    new_address = nm.get_address(name=name)
    community_center_df.loc[row.Index, "주소"] = new_address
    
    if new_address:
        lon, lat = nm.get_lon_lat(new_address)
        community_center_df.loc[row.Index, "경도"] = lon
        community_center_df.loc[row.Index, "위도"] = lat

# 수작업
community_center_df.loc[community_center_df["읍면동"] == "상리면"] = \
    ["경남", "고성군", "상리면", "52951", "경남 고성군 척번정1길 105", "상리면사무소", 128.1864013, 34.9997999]
community_center_df.loc[(community_center_df["시군구"] == "경산시") & (community_center_df["읍면동"] == "남부동")] = \
    ["경북", "경산시", "남부동", "38645", "경북 경산시 경안로 68 (신교동)", "남부동행정복지센터", 128.7391133, 35.8137117]

community_center_df.to_csv("../data/processed/community_center_df.csv", encoding="UTF-8", index=False)

In [None]:
community_center_df = pd.read_csv("../data/processed/community_center_df.csv", encoding="UTF-8")
community_center_df

## population_df

In [5]:
population_df = pd.read_csv("../data/raw/202409_202409_연령별인구현황_월간.csv", encoding="cp949")
population_df.drop(population_df.columns[2:], axis=1, inplace=True)
population_df = population_df[population_df["행정구역"].map(lambda x: int(x[-11:-1])) % 100000 != 0]
population_df.rename({"2024년09월_계_총인구수": "인구수"}, axis=1, inplace=True)
population_df["인구수"] = population_df["인구수"].map(lambda x: int(x.replace(",", "")))

def district_info_seperate(info: str, typ: str) -> str:
    name, code = info[:-12], info[-11:-1]
    if name.count(" ") == 2:
        sido, sigungu, eupmyeongdong = name.split(" ")
    elif name.count(" ") == 3:
        sido, si, gungu, eupmyeongdong = name.split(" ")
        sigungu = si + " " + gungu
    else:
        raise ValueError

    assert typ in ["sido", "sigungu", "eupmyeondong", "code"]
    if typ == "sido":
        return sido
    elif typ == "sigungu":
        return sigungu
    elif typ == "eupmyeondong":
        return eupmyeongdong
    else:
        return code

population_df["시도"] = population_df["행정구역"].map(lambda x: partial(district_info_seperate, typ="sido")(x))
population_df["시군구"] = population_df["행정구역"].map(lambda x: partial(district_info_seperate, typ="sigungu")(x))
population_df["읍면동"] = population_df["행정구역"].map(lambda x: partial(district_info_seperate, typ="eupmyeondong")(x))
population_df["행정구역코드"] = population_df["행정구역"].map(lambda x: partial(district_info_seperate, typ="code")(x))

population_df.drop("행정구역", axis=1, inplace=True)
population_df = population_df[population_df.columns.tolist()[1:] + ["인구수"]]

def sigungu_name_abbr(name: str) -> str:
    if name[:2] in ["충청", "전라", "경상"]:
        return name[0] + name[2]
    else:
        return name[:2]
population_df["시도"] = population_df["시도"].map(sigungu_name_abbr)

population_df.loc[population_df[population_df["시도"] == "세종"].index, "시군구"] = "세종특별자치시"

# 출장소에서 "출장소"를 제거한 읍면동 이름과 매칭하여 인구수를 더함
population_df_chuljangso = population_df[population_df["읍면동"].str.contains("출장소")]
population_df_not_chuljangso = population_df[~population_df["읍면동"].str.contains("출장소")]

for row in population_df_chuljangso.itertuples():
    sigungu = getattr(row, "시군구")
    name = getattr(row, "읍면동")[:2]

    for row2 in population_df_not_chuljangso.itertuples():
        sigungu2 = getattr(row2, "시군구")
        name2 = getattr(row2, "읍면동")[:2]

        if sigungu2 == sigungu and name2 == name:
            population_df_not_chuljangso.loc[row2.Index, "인구수"] += getattr(row, "인구수")
                
population_df = population_df_not_chuljangso.reset_index(drop=True)

population_df["읍면동"] = population_df["읍면동"].map(fix_dong_name)

# 철원 근북면 -> 김화읍으로 통합
geunbuk_pop = population_df.loc[population_df["읍면동"] == "근북면", "인구수"].iloc[0]
population_df.loc[population_df["읍면동"] == "김화읍", "인구수"] += geunbuk_pop
population_df = population_df[population_df["읍면동"] != "근북면"]

population_df.reset_index(drop=True, inplace=True)

population_df.to_csv("../data/processed/population_df.csv", encoding="UTF-8", index=False)

In [None]:
population_df = pd.read_csv("../data/processed/population_df.csv", encoding="UTF-8")
population_df

## community_population_df

In [None]:
joined = pd.merge(community_center_df, population_df, on=["시도", "시군구", "읍면동"], how="left")
joined.to_csv("../data/processed/community_population_df.csv", index=False)

In [None]:
comm_pop_df = pd.read_csv("../data/processed/community_population_df.csv")
comm_pop_df

## emergency_df

In [2]:
emergency_info1 = pd.read_excel("../data/raw/1. 응급의료기관 현황('24.09.30.)_e-gen.xlsx")
emergency_info1.drop("번호", axis=1, inplace=True)

emergency_info2 = pd.read_excel("../data/raw/2. 응급의료기관 외의 의료기관(응급의료시설)('24.09.30.)_e-gen.xlsx", header=5)
emergency_info2.drop("번호", axis=1, inplace=True)

emergency_df = pd.concat([emergency_info1, emergency_info2])
emergency_df.rename({"지역": "시도"}, axis=1, inplace=True)

for col in ["시군구", "기관명"]:
    emergency_df[col] = emergency_df[col].map(lambda x: x.replace(" ", ""))

In [None]:
er_api_list = list()
districts = emergency_df[["시도", "시군구"]].drop_duplicates().sort_values(["시도", "시군구"]).reset_index(drop=True)
pdp = PublicDataPortal()

for row in tqdm(districts.itertuples(), total=districts.shape[0]):
    sido = getattr(row, "시도")
    sigungu = getattr(row, "시군구")

    er = pdp.get_er_list_at_region(sido, sigungu)
    er_df = pd.DataFrame(er)
    er_df["시도"] = sido
    er_df["시군구"] = sigungu
    er_api_list.append(er_df)
    sleep(0.1)

emergency_from_api = pd.concat(er_api_list).reset_index(drop=True)
emergency_from_api["dutyName"] = emergency_from_api["dutyName"].map(lambda x: x.replace(" ", ""))
emergency_from_api

In [5]:
emergency_df_joined = pd.merge(
    left=emergency_df, right=emergency_from_api, 
    left_on=["시도", "시군구", "기관명"], right_on=["시도", "시군구", "dutyName"], how="left"
)

In [None]:
pdp = PublicDataPortal()
er_additional_info_list = list()

for hpid in tqdm(emergency_df_joined["hpid"]):
    er_info = pdp.get_er_info(er_hpid=hpid)
    er_additional_info_list.append(pd.DataFrame([er_info]))
    sleep(0.1)
    break

In [None]:
pdp = PublicDataPortal()
er_additional_info_list = list()

for hpid in tqdm(emergency_df_joined["hpid"]):
    er_info = pdp.get_er_info(er_hpid=hpid)
    er_additional_info_list.append(pd.DataFrame([er_info]))
    sleep(0.1)

er_additional_info_df = pd.concat(er_additional_info_list)
er_additional_info_df.reset_index(drop=True, inplace=True)
er_additional_info_df["hpid"] = emergency_df_joined["hpid"]

In [11]:
emergency_df_joined_2 = pd.merge(emergency_df_joined, er_additional_info_df, on="hpid")
_x_include_col = [col for col in emergency_df_joined_2.columns if "_x" in col]
_y_include_col = [col for col in emergency_df_joined_2.columns if "_y" in col]
emergency_df_joined_2.drop(_y_include_col, axis=1, inplace=True)
emergency_df_joined_2.rename({col: col.replace("_x", "") for col in _x_include_col}, axis=1, inplace=True)

In [None]:
emergency_df_joined_2.columns

In [13]:
emergency_df_joined_2.drop("dutyAddr", axis=1, inplace=True)
emergency_df_joined_2.rename({
    "기관주소(도로명)": "주소",
    "wgs84Lon": "경도",
    "wgs84Lat": "위도",
}, axis=1, inplace=True)

def sigungu_name_abbr(name: str) -> str:
    if name[:2] in ["충청", "전라", "경상"]:
        return name[0] + name[2]
    else:
        return name[:2]
emergency_df_joined_2["시도"] = emergency_df_joined_2["시도"].map(sigungu_name_abbr)

In [14]:
emergency_df_joined_2.to_csv("../data/processed/emergency_df.csv", encoding="UTF-8", index=False)

In [None]:
emergency_df = pd.read_csv("../data/processed/emergency_df.csv")
emergency_df