In [1]:
import os
import time
import urllib.parse
import requests
from bs4 import BeautifulSoup

In [2]:
session = requests.Session()
url = 'https://pip.moi.gov.tw/Publicize/Info/E1040'

# （1）先 GET 頁面取得 token
resp = session.get(url)
resp.encoding = 'utf-8'

soup = BeautifulSoup(resp.text, 'html.parser')
token = soup.find('input', {'name': '__RequestVerificationToken'})['value']


In [3]:
# 2. 要跑的縣市代號與季度
cities = [
    {"臺北市": "63000"}, {"新北市": "65000"}, {"桃園市": "68000"}, {"新竹市": "10018"}, {"新竹縣": "10004"}, {"臺中市": "66000"}, {"臺南市": "67000"}, {"高雄市": "64000"},
    {"南投縣": "10008"}, {"嘉義市": "10020"}, {"嘉義縣": "10010"}, {"宜蘭縣": "10002"}, {"屏東縣": "10013"}, {"彰化縣": "10007"}, {"澎湖縣": "10016"}, {"臺東縣": "10014"},
    {"花蓮縣": "10015"}, {"苗栗縣": "10005"}, {"連江縣": "09007"}, {"金門縣": "09020"}, {"雲林縣": "10009"}, {"基隆市":"10017"}
]

# cities = [
#     {"臺北市": "63000"},
# ]
payload_quarters = [f"{y}Q{q}" for y in range(109, 114) for q in (1,2,3,4)]
file_name_quarters = [f"{y}Y{q}S" for y in range(109, 114) for q in (1,2,3,4)]
# 3. 輸出資料夾
output_dir = r"C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data"
os.makedirs(output_dir, exist_ok=True)

In [4]:
# 4. Base payload
base_payload = {
    'tpc01_pidx': '1', 'tpc02_pidx': '1', 'tpc03_pidx': '1',
    'tpc04_pidx': '1', 'q1_ddate_sel': '', 'q1_city_sel': '',
    'q2_ddate_sel': '', 'q2_city_sel': '', 'q2_town_sel': '',
    'q3_ddate_sel': '', 'q3_qq_sel': '', 'q3_city_sel': '',
    'q4_ddate_sel': '', 'q4_qq_sel': '', 'q4_city_sel': '',
    'q4_town_sel': '', 'q5_ddate_sel': '', 'q5_city_sel': '',
    'F01': 'DataGroup4',
    'F04': '',
    'F05': '',
    'F06': '',
    '__RequestVerificationToken': token,
}

headers = {
    'Referer': url,
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
}


In [5]:
# 5. 迴圈下載：用 zip 同時取 payload 與檔名用的季度
for city_dict in cities:
    city_name, city_code = next(iter(city_dict.items()))
    for payload_q, file_q in zip(payload_quarters, file_name_quarters):
        payload = base_payload.copy()
        payload['F02'] = payload_q       # 給 POST 的季度
        payload['F03'] = city_code       # 給 POST 的縣市代號

        r = session.post(url, data=payload, headers=headers)
        r.raise_for_status()

        # 這裡用 file_q 來取代原本的 payload_q
        fname = f"待售新成屋_{file_q}_{city_code}_{city_name}.csv"
        path = os.path.join(output_dir, fname)

        with open(path, 'wb') as fp:
            fp.write(r.content)

        print(f"下載完成：{path}")
        time.sleep(0.5)

下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_109Y1S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_109Y2S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_109Y3S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_109Y4S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_110Y1S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_110Y2S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_110Y3S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_110Y4S_63000_臺北市.csv
下載完成：C:\pylabs\area-risk-flagging\data\pip_moi\district_new_house_for_sale\raw_data\待售新成屋_111Y1S_63000_臺北市.csv
下