In [1]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
from scipy.spatial import cKDTree
import numpy as np

### 건물 데이터셋, 지하철역 데이터셋 불러오기

In [73]:
address = pd.read_csv("C:/ZIPcoding/address_with_hangang.csv", low_memory=False)
address.head()

Unnamed: 0,자치구코드,법정동코드,본번,부번,주소,위도,경도,geometry,한강변여부
0,11110,10100,1,0,서울특별시 종로구 청운동 1,37.591286,126.968196,POINT (126.968196 37.591286),0
1,11110,10100,3,0,서울특별시 종로구 청운동 3,37.591066,126.967546,POINT (126.9675462 37.5910657),0
2,11110,10100,3,150,서울특별시 종로구 청운동 3-150,37.590601,126.967965,POINT (126.9679652 37.5906011),0
3,11110,10100,4,3,서울특별시 종로구 청운동 4-3,37.590015,126.967734,POINT (126.9677341 37.5900153),0
4,11110,10100,8,15,서울특별시 종로구 청운동 8-15,37.588491,126.967931,POINT (126.9679306 37.588491),0


In [74]:
subway = pd.read_csv("C:/ZIPcoding/seoul_subway.csv", low_memory=False)
subway.head()

Unnamed: 0,역번호,역사명,노선번호,노선명,영문역사명,한자역사명,환승역구분,환승노선번호,환승노선명,역위도,역경도,운영기관명,역사도로명주소,역사전화번호,데이터기준일자
0,529,공덕,S1105,5호선,Gongdeok,孔德,도시철도 환승역,S1106,수도권 도시철도 6호선,37.544431,126.951372,서울교통공사,서울특별시 마포구 마포대로 지하100(공덕동),02-6311-5291,2024-12-31 00:00:00
1,1014,청량리역,I41K4,경의중앙선,Cheongnyangni,淸凉里,환승역,"S1101, I41K2, I4105","1호선, 경춘선, 분당선",37.580543,127.046516,한국철도공사,서울시 동대문구 왕산로 214,1544-7788,2024-12-31 00:00:00
2,1015,회기역,I4102,경원선,Hoegi,回 基,환승역,"I41K4, I41K2","경의중앙선, 경춘선",37.589802,127.057936,한국철도공사,서울시 동대문구 회기로196(휘경동 317-101),1544-7788,2024-12-31 00:00:00
3,205,동대문역사문화공원(DDP),S1102,2호선,Dongdaemun History & Culture Park,東大門歷史文化公園(DDP),도시철도 환승역,I1104+S1105,수도권 광역철도 4호선+수도권 도시철도 5호선,37.565613,127.005353,서울교통공사,서울특별시 중구 을지로 지하279(을지로7가),02-6110-2051,2024-12-31 00:00:00
4,512,김포공항,S1105,5호선,Gimpo Int'l Airport,金浦空港,도시철도 환승역,S11S1+I28A1+L41G1+I41WS,수도권 도시철도 9호선+수도권 광역철도 공항+김포도시철도+서해선,37.562384,126.801292,서울교통공사,서울특별시 강서구 하늘길 지하77(방화동),02-6311-5121,2024-12-31 00:00:00


### 건물별 최근접 지하철역과 거리 컬럼 생성

In [78]:
# GeoDataFrame 변환 (WGS84 좌표계 사용)
gdf_address = gpd.GeoDataFrame(address, geometry=gpd.points_from_xy(address["경도"], address["위도"]), crs="EPSG:4326")
gdf_subway = gpd.GeoDataFrame(subway, geometry=gpd.points_from_xy(subway["역경도"], subway["역위도"]), crs="EPSG:4326")

In [80]:
# 최근접 지하철역 찾기 (KDTree 활용)
tree = cKDTree(np.vstack([gdf_subway.geometry.x, gdf_subway.geometry.y]).T)
distances, indices = tree.query(np.vstack([gdf_address.geometry.x, gdf_address.geometry.y]).T, k=1)

In [81]:
# 결과 데이터 추가
gdf_address["최근접역"] = gdf_subway.iloc[indices]["역사명"].values
gdf_address["최근접역_최단거리(km)"] = distances * 111  # 대략적인 거리 변환 (1도 ≈ 111km)

In [84]:
gdf_address

Unnamed: 0,자치구코드,법정동코드,본번,부번,주소,위도,경도,geometry,한강변여부,최근접역,최근접역_최단거리(km)
0,11110,10100,1,0,서울특별시 종로구 청운동 1,37.591286,126.968196,POINT (126.9682 37.59129),0,경복궁(정부서울청사),1.822044
1,11110,10100,3,0,서울특별시 종로구 청운동 3,37.591066,126.967546,POINT (126.96755 37.59107),0,경복궁(정부서울청사),1.823947
2,11110,10100,3,150,서울특별시 종로구 청운동 3-150,37.590601,126.967965,POINT (126.96797 37.5906),0,경복궁(정부서울청사),1.759152
3,11110,10100,4,3,서울특별시 종로구 청운동 4-3,37.590015,126.967734,POINT (126.96773 37.59002),0,경복궁(정부서울청사),1.707918
4,11110,10100,8,15,서울특별시 종로구 청운동 8-15,37.588491,126.967931,POINT (126.96793 37.58849),0,경복궁(정부서울청사),1.543582
...,...,...,...,...,...,...,...,...,...,...,...
68385,11740,11000,695,0,서울특별시 강동구 강일동 695,37.559969,127.180761,POINT (127.18076 37.55997),0,강일,0.602751
68386,11740,11000,699,0,서울특별시 강동구 강일동 699,37.559850,127.178826,POINT (127.17883 37.55985),0,강일,0.414660
68387,11740,11000,701,0,서울특별시 강동구 강일동 701,37.561524,127.176878,POINT (127.17688 37.56152),1,강일,0.459942
68388,11740,11000,707,0,서울특별시 강동구 강일동 707,37.561180,127.171665,POINT (127.17166 37.56118),1,강일,0.626046


In [86]:
# 추가 정보 컬럼 추가
gdf_address["최근접역_위도"] = gdf_subway.iloc[indices]["역위도"].values
gdf_address["최근접역_경도"] = gdf_subway.iloc[indices]["역경도"].values
gdf_address["최근접역_노선명"] = gdf_subway.iloc[indices]["노선명"].values
gdf_address["최근접역_환승역구분"] = gdf_subway.iloc[indices]["환승역구분"].values
gdf_address["최근접역_geometry"] = gdf_subway.iloc[indices]["geometry"].values
gdf_address["최근접역_환승노선명"] = gdf_subway.iloc[indices]["환승노선명"].values


In [87]:
gdf_address.head()

Unnamed: 0,자치구코드,법정동코드,본번,부번,주소,위도,경도,geometry,한강변여부,최근접역,최근접역_최단거리(km),최근접역_위도,최근접역_경도,최근접역_노선명,최근접역_환승역구분,최근접역_geometry,최근접역_환승노선명
0,11110,10100,1,0,서울특별시 종로구 청운동 1,37.591286,126.968196,POINT (126.9682 37.59129),0,경복궁(정부서울청사),1.822044,37.575762,126.97353,3호선,도시철도 일반역,POINT (126.97353 37.57576),
1,11110,10100,3,0,서울특별시 종로구 청운동 3,37.591066,126.967546,POINT (126.96755 37.59107),0,경복궁(정부서울청사),1.823947,37.575762,126.97353,3호선,도시철도 일반역,POINT (126.97353 37.57576),
2,11110,10100,3,150,서울특별시 종로구 청운동 3-150,37.590601,126.967965,POINT (126.96797 37.5906),0,경복궁(정부서울청사),1.759152,37.575762,126.97353,3호선,도시철도 일반역,POINT (126.97353 37.57576),
3,11110,10100,4,3,서울특별시 종로구 청운동 4-3,37.590015,126.967734,POINT (126.96773 37.59002),0,경복궁(정부서울청사),1.707918,37.575762,126.97353,3호선,도시철도 일반역,POINT (126.97353 37.57576),
4,11110,10100,8,15,서울특별시 종로구 청운동 8-15,37.588491,126.967931,POINT (126.96793 37.58849),0,경복궁(정부서울청사),1.543582,37.575762,126.97353,3호선,도시철도 일반역,POINT (126.97353 37.57576),


In [90]:
gdf_address.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 68390 entries, 0 to 68389
Data columns (total 17 columns):
 #   Column         Non-Null Count  Dtype   
---  ------         --------------  -----   
 0   자치구코드          68390 non-null  int64   
 1   법정동코드          68390 non-null  int64   
 2   본번             68390 non-null  int64   
 3   부번             68390 non-null  int64   
 4   주소             68390 non-null  object  
 5   위도             68390 non-null  float64 
 6   경도             68390 non-null  float64 
 7   geometry       68390 non-null  geometry
 8   한강변여부          68390 non-null  int64   
 9   최근접역           68390 non-null  object  
 10  최근접역_최단거리(km)  68390 non-null  float64 
 11  최근접역_위도        68390 non-null  float64 
 12  최근접역_경도        68390 non-null  float64 
 13  최근접역_노선명       68390 non-null  object  
 14  최근접역_환승역구분     68390 non-null  object  
 15  최근접역_geometry  68390 non-null  geometry
 16  최근접역_환승노선명     13304 non-null  object  
dtypes: float64(5), geometry

In [97]:
gdf_address['최근접역_환승역구분'].value_counts()

최근접역_환승역구분
도시철도 일반역    49206
도시철도 환승역     9676
일반역          6107
환승역          3401
Name: count, dtype: int64

In [99]:
# '최근접역_환승역구분' 값 변경
gdf_address["최근접역_환승역구분"] = gdf_address["최근접역_환승역구분"].replace({
    "도시철도 일반역": "일반역",
    "도시철도 환승역": "환승역"
})

In [101]:
gdf_address['최근접역_환승역구분'].value_counts()

최근접역_환승역구분
일반역    55313
환승역    13077
Name: count, dtype: int64

In [109]:
gdf_address.to_csv("C:/ZIPcoding/address_with_hangang_subway.csv", index = False)

### 도로 기준 거리 계산

#### Google API 테스트

In [210]:
import json
import pandas as pd
import requests

In [242]:
# google map api
google_map_api_key="AIzaSyALb9xCqRsh5cRQETDegprabjRMaC9KLSI"
how_to_go="driving" #options : walking, driving, bicycling, transit => transit : public transportation

# gps coordinates
origin = "13.76515,100.53904"
destination = "13.7329,100.52898"

origin = '37.59060,126.96796'
destination = "37.57576,126.97353"

In [244]:
origin

'37.59060,126.96796'

In [246]:
url="https://maps.googleapis.com/maps/api/directions/json?"\
f"&origin={origin}"\
f"&destination={destination}"\
f"&mode={how_to_go}"\
f"&key={google_map_api_key}"
url

'https://maps.googleapis.com/maps/api/directions/json?&origin=37.59060,126.96796&destination=37.57576,126.97353&mode=driving&key=AIzaSyALb9xCqRsh5cRQETDegprabjRMaC9KLSI'

In [248]:
response = requests.get(url)
response = response.json()
response

{'available_travel_modes': ['TRANSIT'],
 'geocoded_waypoints': [{}, {}],
 'routes': [],
 'status': 'ZERO_RESULTS'}

In [218]:
print(response['routes'][0]['legs'][0]['distance'])
print(response['routes'][0]['legs'][0]['duration'])

{'text': '4.2 km', 'value': 4183}
{'text': '1 hour 1 min', 'value': 3669}


In [206]:
data=pd.json_normalize(
    data=response,
    record_path=['routes',['legs']],
    meta=['status']
)
data

In [169]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   end_address          1 non-null      object 
 1   start_address        1 non-null      object 
 2   steps                1 non-null      object 
 3   traffic_speed_entry  1 non-null      object 
 4   via_waypoint         1 non-null      object 
 5   distance.text        1 non-null      object 
 6   distance.value       1 non-null      int64  
 7   duration.text        1 non-null      object 
 8   duration.value       1 non-null      int64  
 9   end_location.lat     1 non-null      float64
 10  end_location.lng     1 non-null      float64
 11  start_location.lat   1 non-null      float64
 12  start_location.lng   1 non-null      float64
 13  status               1 non-null      object 
dtypes: float64(4), int64(2), object(8)
memory usage: 244.0+ bytes


In [194]:
data.columns

Index(['end_address', 'start_address', 'steps', 'traffic_speed_entry',
       'via_waypoint', 'distance.text', 'distance.value', 'duration.text',
       'duration.value', 'end_location.lat', 'end_location.lng',
       'start_location.lat', 'start_location.lng', 'status'],
      dtype='object')

#### 내 데이터에 적용

In [196]:
df = pd.DataFrame(columns=['end_address', 'start_address', 'steps', 'traffic_speed_entry',
       'via_waypoint', 'distance.text', 'distance.value', 'duration.text',
       'duration.value', 'end_location.lat', 'end_location.lng',
       'start_location.lat', 'start_location.lng', 'status'])

In [208]:
# google map api
google_map_api_key="AIzaSyALb9xCqRsh5cRQETDegprabjRMaC9KLSI"
how_to_go="walking" #options : walking, driving, bicycling, transit => transit : public transportation

In [222]:
cnt = 0
for row in range(gdf_address.shape[0]):
    origin = str(gdf_address.iloc[row,:]['위도']) + "," + str(gdf_address.iloc[row,:]['경도'])
    destination = str(gdf_address.iloc[row,:]['최근접역_위도']) + "," + str(gdf_address.iloc[row,:]['최근접역_경도'])

    print(origin, destination)

    url="https://maps.googleapis.com/maps/api/directions/json?"\
        f"&origin={origin}"\
        f"&destination={destination}"\
        f"&mode={how_to_go}"\
        f"&key={google_map_api_key}"

    response = requests.get(url)
    response = response.json()

    temp=pd.json_normalize(
            data=response,
            record_path=['routes',['legs']],
            meta=['status']
                    )

    df = pd.concat([df, temp], ignore_index = True)

    cnt += 1

    if cnt == 3:
        break
            

37.591286,126.968196 37.575762,126.97353
37.5910657,126.9675462 37.575762,126.97353
37.5906011,126.9679652 37.575762,126.97353


In [200]:
df

Unnamed: 0,end_address,start_address,steps,traffic_speed_entry,via_waypoint,distance.text,distance.value,duration.text,duration.value,end_location.lat,end_location.lng,start_location.lat,start_location.lng,status


In [202]:
temp

### 카카오 API 테스트

In [266]:
import requests

KAKAO_API_KEY = "929ad4c932b9aedfb9592f91777ffa6a"
origin = "127.027621,37.497942"  # 출발지 (위도, 경도 순서)
destination = "126.970833,37.554722"  # 도착지

url = f"https://apis-navi.kakaomobility.com/v1/directions?origin={origin}&destination={destination}"
headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}

response = requests.get(url, headers=headers)
data = response.json()
print(data)


{'trans_id': '01951e3953047c72a970783d9e5749ac', 'routes': [{'result_code': 0, 'result_msg': '길찾기 성공', 'summary': {'origin': {'name': '', 'x': 127.02761332711667, 'y': 37.49794111240558}, 'destination': {'name': '', 'x': 126.97083018862388, 'y': 37.55472158988695}, 'waypoints': [], 'priority': 'RECOMMEND', 'bound': {'min_x': 126.96728547249788, 'min_y': 37.49743813303574, 'max_x': 127.02692641973515, 'max_y': 37.55519723903204}, 'fare': {'taxi': 14600, 'toll': 0}, 'distance': 10647, 'duration': 1718}, 'sections': [{'distance': 10647, 'duration': 1718, 'bound': {'min_x': 126.96914488629311, 'min_y': 37.497949935830974, 'max_x': 127.02766823190767, 'max_y': 37.554712608923886}, 'roads': [{'name': '강남대로', 'distance': 774, 'duration': 316, 'traffic_speed': 8.0, 'traffic_state': 1, 'vertexes': [127.02759059417166, 37.497949935830974, 127.02766823190767, 37.49806771206318, 127.02749382020153, 37.498435708965225, 127.02697034078221, 37.49955771701745, 127.02659736426885, 37.50041064630369, 12

In [268]:
data["routes"][0]["summary"]["distance"] # 미터
# data["routes"][0]["summary"]["duration"] # 초

10647

### 내 데이터에 적용

In [11]:
gdf_address = pd.read_csv("C:/ZIPcoding/address_with_hangang_subway.csv", low_memory=False)

In [13]:
import requests
import time

KAKAO_API_KEY = "929ad4c932b9aedfb9592f91777ffa6a"

In [None]:
meter = []

for row in range(gdf_address.shape[0]):
    origin = str(gdf_address.iloc[row,:]['경도']) + "," + str(gdf_address.iloc[row,:]['위도'])
    destination = str(gdf_address.iloc[row,:]['최근접역_경도']) + "," + str(gdf_address.iloc[row,:]['최근접역_위도'])

    url = f"https://apis-navi.kakaomobility.com/v1/directions?origin={origin}&destination={destination}"
    headers = {"Authorization": f"KakaoAK {KAKAO_API_KEY}"}

    response = requests.get(url, headers=headers)
    data = response.json()

    try:
        temp_meter = data["routes"][0]["summary"]["distance"]
    
        if temp_meter:
            meter.append(temp_meter)
        else:
            meter.append(-1)
    except:
        meter.append(-1)       

In [17]:
meter

[1886,
 1884,
 1780,
 1791,
 1641,
 1768,
 1393,
 1316,
 1421,
 1452,
 1656,
 1602,
 1561,
 1578,
 1820,
 1547,
 1722,
 1698,
 1272,
 1652,
 1254,
 1299,
 1252,
 1082,
 1278,
 1268,
 1235,
 1139,
 1774,
 1409,
 1603,
 1807,
 1206,
 1258,
 1260]

In [297]:
len(meter)

71