In [1]:
# from pycaret.time_series import *

import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import category_encoders as ce

# Visualization
import matplotlib.pylab as plt
from matplotlib import font_manager, rc
import matplotlib
import seaborn as sns
import plotly.express as px
%matplotlib inline
matplotlib.rcParams['font.family'] = 'Malgun Gothic' # 한글 패치
# Preprocessing & Feature Engineering
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.impute import SimpleImputer 
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.feature_selection import SelectPercentile

# Hyperparameter Optimization
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Modeling
# from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neural_network import MLPClassifier
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import ExtraTreesClassifier
# from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier, XGBRegressor, XGBRFRegressor
from lightgbm import LGBMClassifier, LGBMRegressor
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier, VotingRegressor 
from sklearn.ensemble import StackingClassifier, StackingRegressor
# from sklearn.base import ClassifierMixin

# CatBoost
from catboost import CatBoostRegressor

# PyTorch
# import torch
# from torch.utils.data import Dataset, DataLoader, TensorDataset
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torch.autograd import Variable
# from torch.nn import Parameter
# from torch import Tensor
# from torch.utils.data import DataLoader

# Evaluation
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import log_loss,mean_squared_error

# Utility
import os
import time
import datetime # ⚠️2019년 12월30일과 31일의 week of year가 1인 오류가 있음
import random
import warnings; warnings.filterwarnings("ignore")
from IPython.display import Image
import pickle
from tqdm import tqdm
import platform
from itertools import combinations
from scipy.stats.mstats import gmean
import holidays

# from bayes_opt import BayesianOptimization
# from num2words import num2words
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.outliers_influence import OLSInfluence

import folium
from geopy.geocoders import Nominatim
import time
import geopandas as gpd
import fiona
def get_coordinates(address):
    geolocator = Nominatim(user_agent = "South Korea")
    location = geolocator.geocode(address)

    if location:
        return location.latitude, location.longitude
    else:
        return 0
path = '../data/daegu/'
external = path+'external_open/대구 빅데이터 마트 데이터/'

In [2]:
# 대구광역시 행정동
daegu_geo1 = gpd.read_file(external+'0. Base/대구광역시_100.gpkg',encoding='cp949')
daegu_geo2 = gpd.read_file(external+'0. Base/대구광역시_300.gpkg',encoding='cp949')
daegu_geo3 = gpd.read_file(external+'0. Base/대구광역시_500.gpkg',encoding='cp949')

daegu_geo = pd.concat([daegu_geo1,daegu_geo2,daegu_geo3],axis=0)
daegu_geo = daegu_geo.to_crs(epsg='4326') # crs 정보 위경도 좌표계로 변경
daegu_geo.set_index('id',inplace=True)
daegu_geo.sort_index(inplace=True)
daegu_geo.reset_index(inplace=True)
daegu_geo.head(2)

Unnamed: 0,id,left,top,right,bottom,geometry
0,69,1077033.0,1746587.0,1077533.0,1746087.0,"MULTIPOLYGON (((128.35675 35.70806, 128.35710 ..."
1,70,1077033.0,1746087.0,1077533.0,1745587.0,"MULTIPOLYGON (((128.35180 35.70402, 128.35230 ..."


In [3]:
daegu_geo['centroid'] = daegu_geo['geometry'].centroid
daegu_geo['area'] = daegu_geo['geometry'].area

In [4]:
daegu_geo.head(2)

Unnamed: 0,id,left,top,right,bottom,geometry,centroid,area
0,69,1077033.0,1746587.0,1077533.0,1746087.0,"MULTIPOLYGON (((128.35675 35.70806, 128.35710 ...",POINT (128.35695 35.70808),4.02746e-08
1,70,1077033.0,1746087.0,1077533.0,1745587.0,"MULTIPOLYGON (((128.35180 35.70402, 128.35230 ...",POINT (128.35511 35.70523),1.49245e-05


In [5]:
daegu_geo['centroid'][0].x, daegu_geo['centroid'][0].y

(128.35695178788671, 35.708079581069356)

In [6]:
import requests

#### 횡단보도 동별로 개수 구하기
def get_sigudong_from_centroid(point):
    latlong = [point.x,point.y]
    
    # NCP 콘솔에서 복사한 클라이언트ID와 클라이언트Secret 값
    client_id = "y9ig6cz3sg"
    client_secret = "1YHOy21rqo1f1gmJkxkUGD8jTDmIZ7Z81IdtLfcY"

    # 좌표 (경도, 위도)
    coords = f"{latlong[0]},{latlong[1]}"
    output = "json"
    orders = 'addr'
    endpoint = "https://naveropenapi.apigw.ntruss.com/map-reversegeocode/v2/gc"
    url = f"{endpoint}?coords={coords}&output={output}&orders={orders}"

    # 헤더
    headers = {
        "X-NCP-APIGW-API-KEY-ID": client_id,
        "X-NCP-APIGW-API-KEY": client_secret,
    }

    # 요청
    res = requests.get(url, headers=headers)
    j = res.json()
    
    if j['status']['name'] == 'ok':
        si = j['results'][0]['region']['area1']['name']
        gu = j['results'][0]['region']['area2']['name']
        dong = j['results'][0]['region']['area3']['name']
        
    else:
        si = ''
        gu = ''
        dong = ''
    
    return [si,gu,dong]   

In [7]:
result=daegu_geo['centroid'].apply(get_sigudong_from_centroid)

In [23]:
si=pd.DataFrame(result.map(lambda x:x[0]),columns=['도시'])
gu=pd.DataFrame(result.map(lambda x:x[1]),columns=['구'])
dong=pd.DataFrame(result.map(lambda x:x[2]),columns=['동'])

sigudong = pd.concat([si,gu,dong],axis=1)
sigudong

Unnamed: 0,도시,구,동
0,경상북도,고령군,개진면
1,대구광역시,달성군,구지면
2,대구광역시,달성군,구지면
3,대구광역시,달성군,구지면
4,대구광역시,달성군,구지면
...,...,...,...
103291,대구광역시,동구,내곡동
103292,대구광역시,동구,내곡동
103293,대구광역시,동구,내곡동
103294,대구광역시,동구,숙천동


In [25]:
daegu_total = pd.concat([daegu_geo,sigudong],axis=1)
daegu_total

Unnamed: 0,id,left,top,right,bottom,geometry,centroid,area,도시,구,동
0,69,1.077033e+06,1.746587e+06,1.077533e+06,1.746087e+06,"MULTIPOLYGON (((128.35675 35.70806, 128.35710 ...",POINT (128.35695 35.70808),4.027460e-08,경상북도,고령군,개진면
1,70,1.077033e+06,1.746087e+06,1.077533e+06,1.745587e+06,"MULTIPOLYGON (((128.35180 35.70402, 128.35230 ...",POINT (128.35511 35.70523),1.492450e-05,대구광역시,달성군,구지면
2,71,1.077033e+06,1.745587e+06,1.077533e+06,1.745087e+06,"MULTIPOLYGON (((128.35148 35.69943, 128.35148 ...",POINT (128.35431 35.70126),2.440129e-05,대구광역시,달성군,구지면
3,72,1.077033e+06,1.745087e+06,1.077533e+06,1.744587e+06,"MULTIPOLYGON (((128.35205 35.69522, 128.35192 ...",POINT (128.35436 35.69682),2.353216e-05,대구광역시,달성군,구지면
4,73,1.077033e+06,1.744587e+06,1.077533e+06,1.744087e+06,"MULTIPOLYGON (((128.35344 35.69057, 128.35330 ...",POINT (128.35489 35.69239),1.817696e-05,대구광역시,달성군,구지면
...,...,...,...,...,...,...,...,...,...,...,...
103291,168686,1.113833e+06,1.766487e+06,1.113933e+06,1.766387e+06,"MULTIPOLYGON (((128.76201 35.88836, 128.76185 ...",POINT (128.76146 35.88809),3.124031e-07,대구광역시,동구,내곡동
103292,168687,1.113833e+06,1.766387e+06,1.113933e+06,1.766287e+06,"MULTIPOLYGON (((128.76136 35.88747, 128.76143 ...",POINT (128.76128 35.88702),1.647823e-07,대구광역시,동구,내곡동
103293,168688,1.113833e+06,1.766287e+06,1.113933e+06,1.766187e+06,"MULTIPOLYGON (((128.76140 35.88655, 128.76134 ...",POINT (128.76125 35.88648),3.250217e-08,대구광역시,동구,내곡동
103294,168701,1.113833e+06,1.764987e+06,1.113933e+06,1.764887e+06,"MULTIPOLYGON (((128.76110 35.87474, 128.76116 ...",POINT (128.76108 35.87443),1.191190e-07,대구광역시,동구,숙천동


In [26]:
daegu_total = daegu_total[daegu_total['도시']=='대구광역시']

In [35]:
dongs = daegu_total['동'].unique()

In [38]:
centroids = daegu_total[['도시','구','동']].drop_duplicates()
def get_centroid(dong):
    return daegu_total[daegu_total['동']==dong].iloc[0,-5]
centroids['centroid'] = daegu_total['동'].map(get_centroid)
centroids

Unnamed: 0,도시,구,동,centroid
1,대구광역시,달성군,구지면,POINT (128.35510984588092 35.70522573235479)
86,대구광역시,달성군,현풍읍,POINT (128.37631448416624 35.69660751684695)
134,대구광역시,달성군,하빈면,POINT (128.3902710589798 35.87207813737406)
144,대구광역시,달성군,논공읍,POINT (128.38855634501547 35.76393834259423)
349,대구광역시,달성군,옥포읍,POINT (128.41137472309822 35.79954734750882)
...,...,...,...,...
65024,대구광역시,중구,사일동,POINT (128.59423656308468 35.871532301808955)
65305,대구광역시,중구,동성로2가,POINT (128.5953441041262 35.87152220747088)
65582,대구광역시,중구,용덕동,POINT (128.5964889099538 35.874216446882706)
65583,대구광역시,중구,교동,POINT (128.59647648762308 35.87331499903597)


In [46]:
c = centroids.copy()
c['geometry'] = c['centroid'].apply(lambda x:x.buffer(0.0000000000001))
c

Unnamed: 0,도시,구,동,centroid,geometry
1,대구광역시,달성군,구지면,POINT (128.35510984588092 35.70522573235479),POLYGON ((128.35510984588103 35.70522573235479...
86,대구광역시,달성군,현풍읍,POINT (128.37631448416624 35.69660751684695),POLYGON ((128.37631448416636 35.69660751684695...
134,대구광역시,달성군,하빈면,POINT (128.3902710589798 35.87207813737406),POLYGON ((128.39027105897992 35.87207813737406...
144,대구광역시,달성군,논공읍,POINT (128.38855634501547 35.76393834259423),"POLYGON ((128.3885563450156 35.76393834259423,..."
349,대구광역시,달성군,옥포읍,POINT (128.41137472309822 35.79954734750882),POLYGON ((128.41137472309833 35.79954734750882...
...,...,...,...,...,...
65024,대구광역시,중구,사일동,POINT (128.59423656308468 35.871532301808955),POLYGON ((128.5942365630848 35.871532301808955...
65305,대구광역시,중구,동성로2가,POINT (128.5953441041262 35.87152220747088),POLYGON ((128.59534410412633 35.87152220747088...
65582,대구광역시,중구,용덕동,POINT (128.5964889099538 35.874216446882706),POLYGON ((128.5964889099539 35.874216446882706...
65583,대구광역시,중구,교동,POINT (128.59647648762308 35.87331499903597),"POLYGON ((128.5964764876232 35.87331499903597,..."


In [49]:
area_df = pd.DataFrame(daegu_total.groupby('동')['area'].sum()).reset_index()
merged = pd.merge(c,area_df,how='left',on=['동'])

In [51]:
gpd.GeoDataFrame(merged,geometry='geometry').to_csv(path+'daegu_geo.csv',encoding='cp949')