In [1]:
# from pycaret.time_series import *

import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import category_encoders as ce

# Visualization
import matplotlib.pylab as plt
from matplotlib import font_manager, rc
import matplotlib
import seaborn as sns
import plotly.express as px
%matplotlib inline
matplotlib.rcParams['font.family'] = 'Malgun Gothic' # 한글 패치
# Preprocessing & Feature Engineering
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.feature_selection import SelectPercentile

# Hyperparameter Optimization
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Modeling
# from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neural_network import MLPClassifier
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import ExtraTreesClassifier
# from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier, XGBRegressor, XGBRFRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier, VotingRegressor 
from sklearn.ensemble import StackingClassifier, StackingRegressor
# from sklearn.base import ClassifierMixin

# CatBoost
from catboost import CatBoostRegressor

# PyTorch
# import torch
# from torch.utils.data import Dataset, DataLoader, TensorDataset
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torch.autograd import Variable
# from torch.nn import Parameter
# from torch import Tensor
# from torch.utils.data import DataLoader

# Evaluation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import log_loss,mean_squared_error

# Utility
import os
import time
import datetime # ⚠️2019년 12월30일과 31일의 week of year가 1인 오류가 있음
import random
import warnings; warnings.filterwarnings("ignore")
from IPython.display import Image
import pickle
from tqdm import tqdm
import platform
from itertools import combinations
from scipy.stats.mstats import gmean
import holidays

# from bayes_opt import BayesianOptimization
# from num2words import num2words
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import OLSInfluence

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(2023)

import warnings
warnings.filterwarnings('ignore')

path = '../data/daegu/'
train_org = pd.read_csv(path+'train.csv')
test_org = pd.read_csv(path+'test.csv')
submit = pd.read_csv(path+'sample_submission.csv')

In [3]:
# Baseline code 2
### train, test의 시간정보와 공간정보를 분류하고
### 공간정보를 기준으로 위에서 만든 데이터셋과 merge

train_df = train_org.copy()
test_df = test_org.copy()

time_pattern = r'(\d{4})-(\d{1,2})-(\d{1,2}) (\d{1,2})'

train_df[['연', '월', '일', '시간']] = train_org['사고일시'].str.extract(time_pattern)
train_df[['연', '월', '일', '시간']] = train_df[['연', '월', '일', '시간']].apply(pd.to_numeric) # 추출된 문자열을 수치화해줍니다
# train_df = train_df.drop(columns=['사고일시']) # 정보 추출이 완료된 '사고일시' 컬럼은 제거합니다

# 해당 과정을 test_x에 대해서도 반복해줍니다
test_df[['연', '월', '일', '시간']] = test_org['사고일시'].str.extract(time_pattern)
test_df[['연', '월', '일', '시간']] = test_df[['연', '월', '일', '시간']].apply(pd.to_numeric)
# test_df = test_df.drop(columns=['사고일시'])

train_df['사고일시'] = pd.to_datetime(train_df['사고일시'])
test_df['사고일시'] = pd.to_datetime(test_df['사고일시'])

##################################### 🔻피처 추가
# import holidays
# def make_holi(x):
#     kr_holi = holidays.KR()
#     if x in kr_holi:
#         # 공휴일vs비공휴일 eclo가 큰 차이를 보이지 않기 때문에 0.5만
#         return 0.5
#     else:
#         return 0

# for df in [train_df,test_df]:
#     # 제주감귤 공휴일 추출 코드 참고
#     # 공휴일 변수 추가
#     df["공휴일"] = df["사고일시"].map(lambda x : make_holi(x))

#     # 주말 컬럼 추가
#     # 주말vs평일 eclo가 큰 차이를 보이지 않기 때문에 0.5만
#     df['주말'] = df['요일'].map(lambda x:0.5 if x in ['토요일','일요일'] else 0)
    
#     # 시야감소 컬럼 추가 (시간이 20시~04시 사이인 경우)
#     # 이 시간대와 이 시간대 외의 eclo가 0.5~1.4 차이가 나므로 1값을 준다.
#     df['시야감소'] = df['시간'].map(lambda x:1 if (x>=20)|(x<=4) else 0)

In [4]:
################################################# 🔻외부 데이터
location_pattern = r'(\S+) (\S+) (\S+)'

train_df[['도시', '구', '동']] = train_org['시군구'].str.extract(location_pattern)
# train_df = train_df.drop(columns=['시군구'])

test_df[['도시', '구', '동']] = test_org['시군구'].str.extract(location_pattern)
# test_df = test_df.drop(columns=['시군구'])

# 도로형태 나누지 말자.
# road_pattern = r'(.+) - (.+)'

# train_df[['도로형태1', '도로형태2']] = train_org['도로형태'].str.extract(road_pattern)
# train_df = train_df.drop(columns=['도로형태'])

# test_df[['도로형태1', '도로형태2']] = test_org['도로형태'].str.extract(road_pattern)
# test_df = test_df.drop(columns=['도로형태'])

# Baseline code 1
### 보안등, 어린이 보호구역, 주차장 정보를 주소 기준으로 분류
light_df = pd.read_csv(path+'external_open/대구 보안등 정보.csv', encoding='cp949')[['설치개수', '소재지지번주소']]
location_pattern = r'(\S+) (\S+) (\S+) (\S+)'
light_df[['도시', '구', '동', '번지']] = light_df['소재지지번주소'].str.extract(location_pattern)
light_df = light_df.drop(columns=['소재지지번주소', '번지'])
light_df = light_df.groupby(['도시', '구', '동']).sum().reset_index()
light_df.reset_index(inplace=True, drop=True)
light_df = light_df.rename({'설치개수':'보안등개수'})
train_df = pd.merge(train_df, light_df, how='left', on=['도시', '구', '동'])
test_df = pd.merge(test_df, light_df, how='left', on=['도시', '구', '동'])

child_area_df = pd.read_csv(path+'external_open/대구 어린이 보호 구역 정보.csv', encoding='cp949').drop_duplicates()[['소재지지번주소']]
child_area_df['cnt'] = 1
location_pattern = r'(\S+) (\S+) (\S+) (\S+)'
child_area_df[['도시', '구', '동', '번지']] = child_area_df['소재지지번주소'].str.extract(location_pattern)
child_area_df = child_area_df.drop(columns=['소재지지번주소', '번지'])
child_area_df = child_area_df.groupby(['도시', '구', '동']).sum().reset_index()
child_area_df.reset_index(inplace=True, drop=True)
train_df = pd.merge(train_df, child_area_df, how='left', on=['도시', '구', '동'])
test_df = pd.merge(test_df, child_area_df, how='left', on=['도시', '구', '동'])
### ??? 주차장 정보가 필요할까? 
# 가설 : 주차장이 많을수록 도로변 "불법주정차" 수가 줄어 
# 시야 확보에 도움이 될 수 있다. -> ECLO가 낮을 것 <<기각...>>
parking_df = pd.read_csv(path+'external_open/대구 주차장 정보.csv', encoding='cp949')[['소재지지번주소', '급지구분']]
parking_df = pd.get_dummies(parking_df, columns=['급지구분'])
location_pattern = r'(\S+) (\S+) (\S+) (\S+)'
parking_df[['도시', '구', '동', '번지']] = parking_df['소재지지번주소'].str.extract(location_pattern)
parking_df = parking_df.drop(columns=['소재지지번주소', '번지'])
parking_df = parking_df.groupby(['도시', '구', '동']).sum().reset_index()
parking_df.reset_index(inplace=True, drop=True)
train_df = pd.merge(train_df, parking_df, how='left', on=['도시', '구', '동'])
test_df = pd.merge(test_df, parking_df, how='left', on=['도시', '구', '동'])

---
## 지도 시각화

In [5]:
import folium
from geopy.geocoders import Nominatim
import time
import geopandas as gpd
import fiona
from shapely.geometry import Polygon, Point, MultiPolygon
def get_coordinates(address):
    geolocator = Nominatim(user_agent = "South Korea")
    location = geolocator.geocode(address)

    if location:
        return location.latitude, location.longitude
    else:
        return 0
external = path+'external_open/대구 빅데이터 마트 데이터/'

In [6]:
map_daegu = folium.Map(location=[35.8714, 128.6014], zoom_start=12)
# map_daegu

In [7]:
# 대구 행정동
daegu_dong = pd.read_csv(path+'reverse_geocoding/동별면적_경계좌표.csv',encoding='cp949')
daegu_dong.head(3)

Unnamed: 0.1,Unnamed: 0,도시,구,동,centroid,geometry,area,geometry_list
0,0,대구광역시,달성군,구지면,POINT (128.35510984588092 35.70522573235479),POLYGON ((128.35510984588103 35.70522573235479...,0.01144,[128.35510985 35.70522573 128.35510985 35.70...
1,1,대구광역시,달성군,현풍읍,POINT (128.37631448416624 35.69660751684695),POLYGON ((128.37631448416636 35.69660751684695...,0.007064,[128.37631448 35.69660752 128.37631448 35.69...
2,2,대구광역시,달성군,하빈면,POINT (128.3902710589798 35.87207813737406),POLYGON ((128.39027105897992 35.87207813737406...,0.011088,[128.39027106 35.87207814 128.39027106 35.87...


In [10]:
dd = daegu_dong[['구','동','geometry_list']]
test=dd.geometry_list[0]

In [11]:
from shapely import Point


def trans_geo_strlist_to_pointslist(str_list):
    dl = str_list.split()
    dl[0] = dl[0][1:]
    map(lambda x : x.replace(',',''),dl)
    dl[-1] = dl[-1][:-1]
    if len(dl[-1]) < 2:
        del dl[-1]
    dl = list(np.array(dl,dtype='float64'))
    points_list = []
    while len(dl)!=0:
        x = dl.pop()
        y = dl.pop()
#         p = Point(y,x)
        p = (y,x)
        points_list.append(p)
    return tuple(points_list)

In [12]:
dd['points_list'] = dd.geometry_list.apply(trans_geo_strlist_to_pointslist)
dd.head(2)

Unnamed: 0,구,동,geometry_list,points_list
0,달성군,구지면,[128.35510985 35.70522573 128.35510985 35.70...,"((128.35510985, 35.70522573), (128.35510985, 3..."
1,달성군,현풍읍,[128.37631448 35.69660752 128.37631448 35.69...,"((128.37631448, 35.69660752), (128.37631448, 3..."


In [13]:
old1 = gpd.read_file(path+'external_open/대구 빅데이터 마트 데이터/안전/2. 보행노인사고 다발지역/보행노인사고 다발지역_100.gpkg')
old2 = gpd.read_file(path+'external_open/대구 빅데이터 마트 데이터/안전/2. 보행노인사고 다발지역/보행노인사고 다발지역_300.gpkg')
old3 = gpd.read_file(path+'external_open/대구 빅데이터 마트 데이터/안전/2. 보행노인사고 다발지역/보행노인사고 다발지역_500.gpkg')

In [14]:
old = pd.concat([old1,old2,old3],axis=0)
old = gpd.GeoDataFrame(data=old,geometry='geometry')
old = old.to_crs(epsg=4236)
old.head(2)

Unnamed: 0,id,count,occrrnc_cnt_sum,occrrnc_cnt_mean,caslt_cnt_sum,caslt_cnt_mean,dth_dnv_cnt_sum,dth_dnv_cnt_mean,se_dnv_cnt_sum,se_dnv_cnt_mean,sl_dnv_cnt_sum,sl_dnv_cnt_mean,wnd_dnv_cnt_sum,wnd_dnv_cnt_mean,geometry
0,349,,,,,,,,,,,,,,"MULTIPOLYGON (((128.35230 35.70449, 128.35264 ..."
1,350,,,,,,,,,,,,,,"MULTIPOLYGON (((128.35180 35.70402, 128.35225 ..."


In [15]:
from folium import GeoJson
for index, row in old.iterrows():
    if index ==50:
        break
    geojson_data = row['geometry'].__geo_interface__
    print((geojson_data))
    GeoJson(geojson_data).add_to(map_daegu)
map_daegu

{'type': 'MultiPolygon', 'coordinates': [(((128.35230067145747, 35.704492361785356), (128.35263826379298, 35.70480552722058), (128.35263438485669, 35.70444126001363), (128.35224953470637, 35.704443985768684), (128.35230067145747, 35.704492361785356)),)]}
{'type': 'MultiPolygon', 'coordinates': [(((128.3517967449111, 35.70401563730713), (128.35224953470637, 35.704443985768684), (128.35263438485669, 35.70444126001363), (128.35262478502952, 35.70353972120524), (128.35176160139545, 35.703545832975784), (128.3517967449111, 35.70401563730713)),)]}
{'type': 'MultiPolygon', 'coordinates': [(((128.35172842201646, 35.703102279744115), (128.35176160139545, 35.703545832975784), (128.35262478502952, 35.70353972120524), (128.35261518562865, 35.702638182259285), (128.35169786773233, 35.70264467697981), (128.35172842201646, 35.703102279744115)),)]}
{'type': 'MultiPolygon', 'coordinates': [(((128.35169786773233, 35.70264467697981), (128.35261518562865, 35.702638182259285), (128.35260558665414, 35.70173

In [17]:
passenger1 = gpd.read_file(path+'external_open/대구 빅데이터 마트 데이터/안전/11. 보행자사고 다발지역/보행자사고 다발지역_100.gpkg')
passenger2 = gpd.read_file(path+'external_open/대구 빅데이터 마트 데이터/안전/11. 보행자사고 다발지역/보행자사고 다발지역_300.gpkg')
passenger3 = gpd.read_file(path+'external_open/대구 빅데이터 마트 데이터/안전/11. 보행자사고 다발지역/보행자사고 다발지역_500.gpkg')

passenger = pd.concat([passenger1,passenger2,passenger3],axis=0)
passenger = gpd.GeoDataFrame(data=passenger,geometry='geometry')
passenger = passenger.to_crs(epsg=4236)
passenger.head(2)

Unnamed: 0,id,count,occrrnc_cnt_sum,occrrnc_cnt_mean,caslt_cnt_sum,caslt_cnt_mean,dth_dnv_cnt_sum,dth_dnv_cnt_mean,se_dnv_cnt_sum,se_dnv_cnt_mean,sl_dnv_cnt_sum,sl_dnv_cnt_mean,wnd_dnv_cnt_sum,wnd_dnv_cnt_mean,geometry
0,349,,,,,,,,,,,,,,"MULTIPOLYGON (((128.35230 35.70449, 128.35264 ..."
1,350,,,,,,,,,,,,,,"MULTIPOLYGON (((128.35180 35.70402, 128.35225 ..."


---

In [7]:
import requests

def get_address(latlong):
    # NCP 콘솔에서 복사한 클라이언트ID와 클라이언트Secret 값
    client_id = ""
    client_secret = ""

    # 좌표 (경도, 위도)
    coords = "128.35675,35.70806"
    output = "json"
    orders = 'addr'
    endpoint = "https://naveropenapi.apigw.ntruss.com/map-reversegeocode/v2/gc"
    url = f"{endpoint}?coords={coords}&output={output}&orders={orders}"

    # 헤더
    headers = {
        "X-NCP-APIGW-API-KEY-ID": client_id,
        "X-NCP-APIGW-API-KEY": client_secret,
    }

    # 요청
    res = requests.get(url, headers=headers)
    j = res.json()
    
    if j['status']['name'] == 'ok':
        si = j['results'][0]['region']['area1']['name']
        gu = j['results'][0]['region']['area2']['name']
        dong = j['results'][0]['region']['area3']['name']
        
    else:
        si = np.nan
        gu = np.nan
        dong = np.nan
    
    return [si,gu,dong]

In [21]:
# v world api
import requests

def get_latlong(query):
    # NCP 콘솔에서 복사한 클라이언트ID와 클라이언트Secret 값
    search = ''
    request = search
    key = ''
    query = query
    type_ = 'DISTRICT'
    category = 'L2'
    
    
    url = f"https://api.vworld.kr/req/search?service={search}&request=search&version=2.0&crs=EPSG:4236&bbox=&size=1&page=1&query={query}&type={type_}&category={category}&format=json&errorformat=json&key={key}"
    
    # 헤더
#     headers = {
#         "X-NCP-APIGW-API-KEY-ID": client_id,
#         "X-NCP-APIGW-API-KEY": client_secret,
#     }

    # 요청
    res = requests.get(url)
    j = res.json()
    
#     if j['status']['name'] == 'ok':
#         si = j['results'][0]['region']['area1']['name']
#         gu = j['results'][0]['region']['area2']['name']
#         dong = j['results'][0]['region']['area3']['name']
        
#     else:
#         si = np.nan
#         gu = np.nan
#         dong = np.nan
    
    return j

In [22]:
train_df['동'].unique()[:10]

array(['대신동', '감삼동', '두산동', '복현동', '신암동', '지산동', '상인동', '태전동', '지묘동',
       '평리동'], dtype=object)

In [23]:
j = get_latlong('대신동')
j

{'response': {'service': {'name': 'get_latlong',
   'version': '2.0',
   'operation': 'search',
   'time': '2(ms)'},
  'status': 'ERROR',
  'error': {'level': '1',
   'code': 'INVALID_RANGE',
   'text': 'service 파라미터의 값이 유효한 범위를 넘었습니다. 유효한 파라미터 값의 범위 : [search], 입력한 파라미터 값 : get_latlong'}}}

In [24]:
bnd_adm_dong = gpd.read_file(path+'external_open/BND_ADM_DONG_PG.shp')

In [25]:
bnd_adm_dong.head(3)

Unnamed: 0,BASE_DATE,ADM_CD,ADM_NM,geometry
0,20220630,11010530,사직동,"POLYGON ((953553.932 1953335.741, 953555.211 1..."
1,20220630,11010540,삼청동,"POLYGON ((954025.242 1953916.389, 954026.972 1..."
2,20220630,11010550,부암동,"POLYGON ((952490.380 1956548.821, 952497.594 1..."


In [26]:
train_df['동'].unique()[:10]

array(['대신동', '감삼동', '두산동', '복현동', '신암동', '지산동', '상인동', '태전동', '지묘동',
       '평리동'], dtype=object)

In [27]:
bnd_adm_dong[bnd_adm_dong['ADM_NM']=='대신동']

Unnamed: 0,BASE_DATE,ADM_CD,ADM_NM,geometry
635,20220630,22010620,대신동,"POLYGON ((1097660.608 1764636.082, 1097666.419..."
2902,20220630,37030560,대신동,"POLYGON ((1052423.590 1797355.375, 1052457.302..."


In [32]:
bnd_adm_dong = gpd.GeoDataFrame(data=bnd_adm_dong,geometry='geometry',crs='epsg:5179')
bnd_adm_dong = bnd_adm_dong.to_crs(epsg = '4236')
bnd_adm_dong = bnd_adm_dong.rename(columns={'ADM_NM':'동'})

In [33]:
bnd_adm_dong.head(3)

Unnamed: 0,BASE_DATE,ADM_CD,동,geometry
0,20220630,11010530,사직동,"POLYGON ((126.97399 37.57823, 126.97400 37.578..."
1,20220630,11010540,삼청동,"POLYGON ((126.97929 37.58349, 126.97931 37.583..."
2,20220630,11010550,부암동,"POLYGON ((126.96173 37.60714, 126.96182 37.607..."


In [51]:
daegu_dongs = train_df['동'].unique()
print(len(daegu_dongs))
bnd_adm_dong.loc[bnd_adm_dong['동'].isin(daegu_dongs),:]

196


Unnamed: 0,BASE_DATE,ADM_CD,동,geometry
27,20220630,11020690,신당동,"POLYGON ((127.01774 37.56976, 127.01775 37.569..."
60,20220630,11040690,송정동,"POLYGON ((127.07230 37.55688, 127.07209 37.556..."
207,20220630,11140600,대흥동,"POLYGON ((126.94654 37.55434, 126.94646 37.554..."
536,20220630,21090560,송정동,"MULTIPOLYGON (((129.20540 35.17889, 129.20539 ..."
577,20220630,21110680,남산동,"POLYGON ((129.08518 35.27542, 129.08518 35.275..."
...,...,...,...,...
3221,20220630,38060570,향촌동,"MULTIPOLYGON (((128.08947 34.91848, 128.08947 ..."
3255,20220630,38080530,교동,"POLYGON ((128.76760 35.51998, 128.76774 35.519..."
3311,20220630,38113540,문화동,"MULTIPOLYGON (((128.56893 35.19124, 128.56936 ..."
3338,20220630,38115610,덕산동,"MULTIPOLYGON (((128.68240 35.13115, 128.68240 ..."


In [82]:
daegu_geo = gpd.read_file(path+'external_open/LARD_ADM_SECT_SGG_27_202311.shp',encoding='cp949')
daegu_geo = gpd.GeoDataFrame(daegu_geo,crs='epsg:5174',geometry='geometry')

In [83]:
daegu_geo.head(3)

Unnamed: 0,ADM_SECT_C,SGG_NM,SGG_OID,COL_ADM_SE,geometry
0,27110,중구,,27110,"POLYGON ((128.600 35.875, 128.600 35.875, 128...."
1,27140,동구,,27140,"POLYGON ((128.696 36.016, 128.697 36.015, 128...."
2,27170,서구,,27170,"POLYGON ((128.582 35.885, 128.581 35.883, 128...."


In [84]:
daegu_gu = daegu_geo.rename(columns = {'SGG_NM':'구'})[['구','geometry']]
daegu_gu.head(3)

Unnamed: 0,구,geometry
0,중구,"POLYGON ((128.600 35.875, 128.600 35.875, 128...."
1,동구,"POLYGON ((128.696 36.016, 128.697 36.015, 128...."
2,서구,"POLYGON ((128.582 35.885, 128.581 35.883, 128...."


In [85]:
daegu_gu['centroid'] = daegu_gu['geometry'].centroid
daegu_gu['area'] = daegu_gu['geometry'].area
daegu_gu.head(3)

Unnamed: 0,구,geometry,centroid,area
0,중구,"POLYGON ((128.600 35.875, 128.600 35.875, 128....",POINT (128.594 35.867),0.000707
1,동구,"POLYGON ((128.696 36.016, 128.697 36.015, 128....",POINT (128.686 35.934),0.018132
2,서구,"POLYGON ((128.582 35.885, 128.581 35.883, 128....",POINT (128.550 35.875),0.001729


In [86]:
daegu_gu['centroid1'] = daegu_gu['centroid'].y
daegu_gu['centroid2'] = daegu_gu['centroid'].x
daegu_gu.drop(columns='centroid',inplace=True)

In [87]:
daegu_gu.head(3)

Unnamed: 0,구,geometry,area,centroid1,centroid2
0,중구,"POLYGON ((128.600 35.875, 128.600 35.875, 128....",0.000707,35.866535,128.593606
1,동구,"POLYGON ((128.696 36.016, 128.697 36.015, 128....",0.018132,35.934444,128.68565
2,서구,"POLYGON ((128.582 35.885, 128.581 35.883, 128....",0.001729,35.875002,128.549698


In [18]:
daegu_gu.to_file(path+'automl/대구_구별_geo.shp',encoding='utf-8',index=False)

NameError: name 'daegu_gu' is not defined

In [19]:
daegu_gu_geo=gpd.read_file(path+'automl/대구_구별_geo.shp',encoding='utf-8')
daegu_gu_geo

Unnamed: 0,구,area,centroid1,centroid2,geometry
0,중구,0.000707,35.866535,128.593606,"POLYGON ((128.600 35.875, 128.600 35.875, 128...."
1,동구,0.018132,35.934444,128.68565,"POLYGON ((128.696 36.016, 128.697 36.015, 128...."
2,서구,0.001729,35.875002,128.549698,"POLYGON ((128.582 35.885, 128.581 35.883, 128...."
3,남구,0.00177,35.835178,128.58533,"POLYGON ((128.579 35.858, 128.583 35.856, 128...."
4,북구,0.009409,35.928928,128.577206,"POLYGON ((128.615 35.980, 128.615 35.978, 128...."
5,수성구,0.007642,35.83385,128.661273,"POLYGON ((128.592 35.809, 128.592 35.810, 128...."
6,달서구,0.00627,35.827486,128.529204,"POLYGON ((128.519 35.869, 128.521 35.867, 128...."
7,달성군,0.04223,35.759754,128.498224,"MULTIPOLYGON (((128.498 35.818, 128.499 35.818..."
8,군위군,0.061443,36.170117,128.648119,"POLYGON ((128.455 36.327, 128.455 36.327, 128...."


In [20]:
train_df['구'].unique()

array(['중구', '달서구', '수성구', '북구', '동구', '서구', '달성군', '남구'], dtype=object)

In [21]:
from folium import GeoJson
from folium import Marker

for index, row in daegu_gu_geo.iterrows():
    geojson_data = row['geometry'].__geo_interface__
#     print((geojson_data))
    GeoJson(geojson_data).add_to(map_daegu)
    marker = Marker(location=(row[['centroid1','centroid2']]),
                   popup=f"{row['구']}")
    marker.add_to(map_daegu)
map_daegu