In [23]:
"""국토교통부 아파트매매 실거래 데이터 추출 예시
- 링크: https://www.data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057511
"""
from dotenv import load_dotenv
import os
import requests
import pandas as pd
import io
import xmltodict
import json

# import xml.etree.ElementTree as et


# load .env
load_dotenv()
getRTMSDataSvcAptTradeDev_en = os.environ.get("getRTMSDataSvcAptTradeDev_en")
getRTMSDataSvcAptTradeDev_de = os.environ.get("getRTMSDataSvcAptTradeDev_de")

In [24]:
start_date = pd.to_datetime('2019-01-01')
end_date = pd.to_datetime('2022-09-30')
dates = pd.date_range(start_date, end_date, freq="M")

In [25]:
MONTH_LIST = [date.strftime("%Y%m") for date in dates]
RES_ROWS = 10000
SIGUNGU_CODE = 11470

In [26]:
url = "http://openapi.molit.go.kr/OpenAPI_ToolInstallPackage/service/rest/RTMSOBJSvc/getRTMSDataSvcAptTradeDev"

In [27]:
for month in MONTH_LIST:
    params = {
        "serviceKey": getRTMSDataSvcAptTradeDev_de,
        "pageNo": "1",
        "numOfRows": RES_ROWS,
        "LAWD_CD": SIGUNGU_CODE,  # Input 1: 시군구코드 5자리, ex) 11680 = 서울특별시 강남구
        "DEAL_YMD": month,  # Input 2: 년월(YYYYMM), ex) 202208 = 2022년 8월
    }

    response = requests.get(url, params=params)
    dict_content = xmltodict.parse(response.content)["response"]["body"]["items"]["item"]
    df = pd.DataFrame(dict_content)

    df.to_csv(f"data/서울특별시_양천구_아파트매매/서울특별시_양천구_아파트매매_{month}.csv",
              encoding="utf8",
              index=False
    )

ValueError: If using all scalar values, you must pass an index

In [28]:
file_list = os.listdir("data/서울특별시_양천구_아파트매매")
file_list.sort()

In [31]:
file_list

['서울특별시_양천구_아파트매매_201901.csv',
 '서울특별시_양천구_아파트매매_201902.csv',
 '서울특별시_양천구_아파트매매_201903.csv',
 '서울특별시_양천구_아파트매매_201904.csv',
 '서울특별시_양천구_아파트매매_201905.csv',
 '서울특별시_양천구_아파트매매_201906.csv',
 '서울특별시_양천구_아파트매매_201907.csv',
 '서울특별시_양천구_아파트매매_201908.csv',
 '서울특별시_양천구_아파트매매_201909.csv',
 '서울특별시_양천구_아파트매매_201910.csv',
 '서울특별시_양천구_아파트매매_201911.csv',
 '서울특별시_양천구_아파트매매_201912.csv',
 '서울특별시_양천구_아파트매매_202001.csv',
 '서울특별시_양천구_아파트매매_202002.csv',
 '서울특별시_양천구_아파트매매_202003.csv',
 '서울특별시_양천구_아파트매매_202004.csv',
 '서울특별시_양천구_아파트매매_202005.csv',
 '서울특별시_양천구_아파트매매_202006.csv',
 '서울특별시_양천구_아파트매매_202007.csv',
 '서울특별시_양천구_아파트매매_202008.csv',
 '서울특별시_양천구_아파트매매_202009.csv',
 '서울특별시_양천구_아파트매매_202010.csv',
 '서울특별시_양천구_아파트매매_202011.csv',
 '서울특별시_양천구_아파트매매_202012.csv',
 '서울특별시_양천구_아파트매매_202101.csv',
 '서울특별시_양천구_아파트매매_202102.csv',
 '서울특별시_양천구_아파트매매_202103.csv',
 '서울특별시_양천구_아파트매매_202104.csv',
 '서울특별시_양천구_아파트매매_202105.csv',
 '서울특별시_양천구_아파트매매_202106.csv',
 '서울특별시_양천구_아파트매매_202107.csv',
 '서울특별시_양천구_아파트매매_202108.csv',
 '서울특별시_

In [34]:
df_all = pd.DataFrame()
for file in file_list:
    df= pd.read_csv("data/서울특별시_양천구_아파트매매/" + file, encoding='utf-8')
    df_all = pd.concat([df_all, df])

In [38]:
df_all.to_csv("data/서울특별시_양천구_아파트매매(2019_202208).csv", index=False)

In [39]:
df_all.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9575 entries, 0 to 17
Data columns (total 28 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   거래금액        9575 non-null   object 
 1   거래유형        433 non-null    object 
 2   건축년도        9575 non-null   int64  
 3   년           9575 non-null   int64  
 4   도로명         9568 non-null   object 
 5   도로명건물본번호코드  9568 non-null   float64
 6   도로명건물부번호코드  9568 non-null   float64
 7   도로명시군구코드    9568 non-null   float64
 8   도로명일련번호코드   9575 non-null   int64  
 9   도로명지상지하코드   9110 non-null   float64
 10  도로명코드       9568 non-null   float64
 11  법정동         9575 non-null   object 
 12  법정동본번코드     9575 non-null   int64  
 13  법정동부번코드     9575 non-null   int64  
 14  법정동시군구코드    9575 non-null   int64  
 15  법정동읍면동코드    9575 non-null   int64  
 16  법정동지번코드     9575 non-null   int64  
 17  아파트         9575 non-null   object 
 18  월           9575 non-null   int64  
 19  일           9575 non-null   i