In [None]:
import pandas as pd
import requests
import time
import os

# 카카오 API 키
api_key = ""

# 입력 파일 경로와 출력 파일 경로
input_file_path = "tn_visit_area_info_방문지정보_depth_cleaned_score.csv"  # 로컬 파일 경로
output_file_path = "tn_visit_area_info_방문지정보2nd.csv"  # 저장할 파일 경로

# 데이터 로드
if os.path.exists(output_file_path):
    print(f"Resuming from saved file: {output_file_path}")
    activity_consume_df = pd.read_csv(output_file_path, encoding='utf-8-sig')
else:
    print(f"Starting fresh with input file: {input_file_path}")
    activity_consume_df = pd.read_csv(input_file_path, encoding='utf-8-sig')

# 4개 값이 모두 없는 행 제거
print(f"Initial data size: {activity_consume_df.shape}")
activity_consume_df = activity_consume_df.dropna(subset=['ROAD_NM_ADDR', 'LOTNO_ADDR', 'X_COORD', 'Y_COORD'], how='all')
print(f"Data size after removing rows with all 4 values missing: {activity_consume_df.shape}")

# 캐시 초기화
geocode_cache = {}

# 지오코딩 함수 정의
def get_geocode(address, api_key, timeout=5):
    if address in geocode_cache:
        print(f"Cache hit for address: {address}")
        return geocode_cache[address]

    url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
    headers = {"Authorization": f"KakaoAK {api_key}"}
    print(f"Attempting to geocode address: {address}")
    try:
        response = requests.get(url, headers=headers, timeout=timeout)
        if response.status_code == 200:
            result = response.json()
            if result.get('documents'):
                document = result['documents'][0]
                road_address = document.get('address_name')
                x_coord = document.get('x')
                y_coord = document.get('y')
                geocode_cache[address] = (road_address, x_coord, y_coord)
                print(f"Geocode success for address: {address}")
                return road_address, x_coord, y_coord
            else:
                print(f"No results found for address: {address}")
        else:
            print(f"API error for address: {address}, Status code: {response.status_code}, Response: {response.text}")
    except requests.exceptions.Timeout:
        print(f"Timeout error for address: {address}")
    except requests.exceptions.RequestException as e:
        print(f"Request error for address: {address}, Error: {e}")
    return None, None, None

# X_COORD, Y_COORD로 ROAD_NM_ADDR 채우기
def fill_road_nm_from_coords(row):
    x_coord = row['X_COORD']
    y_coord = row['Y_COORD']
    if pd.notna(x_coord) and pd.notna(y_coord):
        print(f"Attempting to fill ROAD_NM_ADDR using coordinates: ({x_coord}, {y_coord})")
        reverse_url = f"https://dapi.kakao.com/v2/local/geo/coord2address.json?x={x_coord}&y={y_coord}"
        headers = {"Authorization": f"KakaoAK {api_key}"}
        try:
            response = requests.get(reverse_url, headers=headers)
            if response.status_code == 200:
                result = response.json()
                if result.get('documents'):
                    address = result['documents'][0].get('address')
                    if address:
                        print(f"ROAD_NM_ADDR found: {address.get('address_name')}")
                        return address.get('address_name')
            else:
                print(f"API error for coordinates ({x_coord}, {y_coord}), Status code: {response.status_code}, Response: {response.text}")
        except requests.exceptions.Timeout:
            print(f"Timeout error for coordinates ({x_coord}, {y_coord})")
        except requests.exceptions.RequestException as e:
            print(f"Request error for coordinates ({x_coord}, {y_coord}), Error: {e}")
    return None

# LOTNO_ADDR로 ROAD_NM_ADDR 채우기
def fill_road_nm_from_lotno(lotno_addr):
    if pd.notna(lotno_addr):
        print(f"Attempting to fill ROAD_NM_ADDR using LOTNO_ADDR: {lotno_addr}")
        road_addr, _, _ = get_geocode(lotno_addr, api_key)
        if road_addr:
            print(f"ROAD_NM_ADDR found from LOTNO_ADDR: {road_addr}")
        return road_addr
    return None

# SIDO_NM, SGG_NM, DONG_NM, RI_NM 정보 추가
def get_region_info(address):
    if pd.notna(address):
        print(f"Attempting to retrieve region info for ROAD_NM_ADDR: {address}")
        road_addr, _, _ = get_geocode(address, api_key)
        if road_addr:
            url = f"https://dapi.kakao.com/v2/local/search/address.json?query={address}"
            headers = {"Authorization": f"KakaoAK {api_key}"}
            try:
                response = requests.get(url, headers=headers)
                if response.status_code == 200:
                    result = response.json()
                    if result.get('documents') and result['documents'][0].get('address'):
                        addr = result['documents'][0]['address']
                        print(f"Region info found: {addr}")
                        return (
                            addr.get('region_1depth_name'),  # SIDO_NM
                            addr.get('region_2depth_name'),  # SGG_NM
                            addr.get('region_3depth_name'),  # DONG_NM
                            addr.get('region_4depth_name')   # RI_NM
                        )
                else:
                    print(f"API error for region info ({address}), Status code: {response.status_code}, Response: {response.text}")
            except requests.exceptions.Timeout:
                print(f"Timeout error for region info ({address})")
            except requests.exceptions.RequestException as e:
                print(f"Request error for region info ({address}), Error: {e}")
    return None, None, None, None

# 데이터 처리
try:
    print("Processing ROAD_NM_ADDR, X_COORD, Y_COORD...")
    for index, row in activity_consume_df.iterrows():
        if pd.isna(row['ROAD_NM_ADDR']):
            road_nm = fill_road_nm_from_coords(row)
            if road_nm is None:
                road_nm = fill_road_nm_from_lotno(row['LOTNO_ADDR'])
            activity_consume_df.at[index, 'ROAD_NM_ADDR'] = road_nm
        
        if pd.notna(row['ROAD_NM_ADDR']):
            if pd.isna(row['X_COORD']) or pd.isna(row['Y_COORD']):
                _, x_coord, y_coord = get_geocode(row['ROAD_NM_ADDR'], api_key)
                activity_consume_df.at[index, 'X_COORD'] = x_coord
                activity_consume_df.at[index, 'Y_COORD'] = y_coord
            if pd.isna(row['LOTNO_ADDR']):
                road_addr, _, _ = get_geocode(row['ROAD_NM_ADDR'], api_key)
                activity_consume_df.at[index, 'LOTNO_ADDR'] = road_addr

    print("Processing SIDO_NM, SGG_NM, DONG_NM, RI_NM...")
    for index, row in activity_consume_df.iterrows():
        if all(pd.isna(row[col]) for col in ['SIDO_NM', 'SGG_NM', 'DONG_NM']):
            region_info = get_region_info(row['ROAD_NM_ADDR'])
            if all(val is None for val in region_info):
                print(f"Removing row {index} due to missing region info")
                activity_consume_df.drop(index, inplace=True)
            else:
                print(f"Region info updated for row {index}")
                activity_consume_df.loc[index, ['SIDO_NM', 'SGG_NM', 'DONG_NM', 'RI_NM']] = region_info

except KeyboardInterrupt:
    print("Processing interrupted. Saving current progress...")
finally:
    activity_consume_df.to_csv(output_file_path, index=False, encoding='utf-8-sig')
    print(f"Partial results saved to: {output_file_path}")

print(f"Final results saved to: {output_file_path}")


Resuming from saved file: tn_visit_area_info_방문지정보2nd.csv
Initial data size: (100761, 32)
Data size after removing rows with all 4 values missing: (100761, 32)
Processing ROAD_NM_ADDR, X_COORD, Y_COORD...
Attempting to geocode address: 서울 마포구 상암동 495-72
Geocode success for address: 서울 마포구 상암동 495-72
Attempting to geocode address: 인천 중구 북성동1가 98-580
Geocode success for address: 인천 중구 북성동1가 98-580
Attempting to geocode address: 서울 영등포구 여의도동 85
Geocode success for address: 서울 영등포구 여의도동 85
Attempting to geocode address: 서울 종로구 동숭동 50-111
Geocode success for address: 서울 종로구 동숭동 50-111
Cache hit for address: 서울 종로구 동숭동 50-111
Attempting to geocode address: 경기 성남시 분당구 율동 318
Geocode success for address: 경기 성남시 분당구 율동 318
Cache hit for address: 경기 성남시 분당구 율동 318
Attempting to geocode address: 경기 수원시 팔달구 인계동 1132-4
Geocode success for address: 경기 수원시 팔달구 인계동 1132-4
Attempting to geocode address: 경기 수원시 팔달구 신풍동 64-1
Geocode success for address: 경기 수원시 팔달구 신풍동 64-1
Attempting to geocode address: 

  activity_consume_df.at[index, 'X_COORD'] = x_coord
  activity_consume_df.at[index, 'Y_COORD'] = y_coord


Attempting to geocode address: 경기 시흥시 조남동 190-1
Geocode success for address: 경기 시흥시 조남동 190-1
Attempting to geocode address: 서울 서초구 방배동 2529
Geocode success for address: 서울 서초구 방배동 2529
Attempting to geocode address: 경기 오산시 양산동 산 19-3
Geocode success for address: 경기 오산시 양산동 산 19-3
Attempting to geocode address: 경기 안양시 만안구 석수동 825-1
Geocode success for address: 경기 안양시 만안구 석수동 825-1
Attempting to geocode address: 서울 영등포구 영등포동 618-496
Geocode success for address: 서울 영등포구 영등포동 618-496
Attempting to geocode address: 서울 성동구 성수동1가 668-11
Geocode success for address: 서울 성동구 성수동1가 668-11
Attempting to geocode address: 서울 서초구 반포동
Geocode success for address: 서울 서초구 반포동
Attempting to geocode address: 경기 파주시 광탄면 기산리 481-1
Geocode success for address: 경기 파주시 광탄면 기산리 481-1
Attempting to geocode address: 경기 연천군 중면 횡산리 산 109
Geocode success for address: 경기 연천군 중면 횡산리 산 109
Attempting to geocode address: 경기 연천군 연천읍 부곡리 193
Geocode success for address: 경기 연천군 연천읍 부곡리 193
Attempting to geocode address: 인