In [1]:
import json
import os
import time
from datetime import date, datetime, timedelta
from pathlib import Path

import pandas as pd
import requests
from bs4 import BeautifulSoup

### 自訂函式

In [2]:
# 自訂函式
def get_city_list(url, headers):
    """用於取得各縣市之代碼，並轉換成list方便迴圈"""
    fmt = "%Y/%m/%d"
    start = date.today() - timedelta(days=1)
    start_date = start
    end_date = (start_date + timedelta(days=1)).strftime(fmt)
    start_date = start_date.strftime(fmt)

    data = {
        "Method": "O302_2",
        "Param": json.dumps(
            {
                "SDATE": start_date,
                "EDATE": end_date,
                "Animal": "0",
            }
        ),
    }

    res = requests.post(url, headers=headers, data=data)
    res.encoding = "utf-8-sig"

    data_orig = json.loads(res.text)
    data_str = data_orig.get("Message", "[]")
    data_json = json.loads(data_str)

    df_id_list = pd.DataFrame(data_json)
    country_list = list(df_id_list["AreaID"])

    return country_list


def get_city_code():
    return {
        "A": "NewTaipei",
        "V": "Taipei",
        "S": "Taichung",
        "U": "Tainan",
        "W": "Kaohsiung",
        "C": "Taoyuan",
        "B": "Yilan",
        "D": "Hsinchu",
        "E": "Miaoli",
        "G": "Changhua",
        "H": "Nantou",
        "I": "Yunlin",
        "J": "Chiayi",
        "M": "Pingtung",
        "N": "Taitung",
        "O": "Hualien",
        "P": "Penghu",
        "Q": "Keelung",
        "R": "HsinchuCity",
        "T": "ChiayiCity",
        "Y": "Kinmen",
        "X": "Lianjiang",
    }


def get_col():
    return [
        "AreaID",
        "AreaName",
        "fld01",
        "fld02",
        "fld03",
        "fld05",
        "fld06",
        "fld04",
        "fld08",
        "fld07",
        "fld10",
        "animal",
        "date",
        "city",
        "update_date"
    ]


def post_data(start_date, end_date, ani, dist):
    return {
            "Method": "O302C_2",
            "Param": json.dumps(
                {
                    "SDATE": start_date,
                    "EDATE": end_date,
                    "Animal": ani,
                    "CountyID": dist,
                }),
            }


def post_requests(url, headers, data):
    res = requests.post(url=url, headers=headers, data=data)
    res.encoding = "utf-8-sig"

    # 因資料是以json格式儲存和回傳，故需json解碼
    data_orig = json.loads(res.text)
    data_str = data_orig.get("Message", "[]")
    data_json = json.loads(data_str)

    return data_json


def stop_try(tries, max_tries):
    if tries >= max_tries:
        return True
    else:
        return False

### 主程式

In [None]:
# 設定訪問網址及headers
url = "https://www.pet.gov.tw/Handler/PostData.ashx"

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
}

# 設定檔案目錄
folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\pet_regis"

# 先取得縣市代碼list
# city_list = get_city_list(url, headers) # 這行用於取得全台縣市代號
city_list = ["A", "V", "S", "U", "W"]

# 取得當前日期
today = date.today()
start = datetime(2025, 11, 2).date()


# 區分犬貓代號
animal = {"0": "犬", "1": "貓"}

# 代碼對應的縣市名，用於存檔檔名
city_dict = get_city_code()

# 先建立主表
columns = get_col()
df_main = pd.DataFrame(columns=columns)

# 開始按照縣市代碼list進行迴圈
while start < today:
    end = start + timedelta(days=1)

    for dist in city_list:
        print(f"開始搜尋{city_dict[dist]}鄉鎮市區資料...")

        # 若起始日期早於當下日期，則開始迴圈更新資料
        start_date = start.strftime("%Y/%m/%d")
        end_date = end.strftime("%Y/%m/%d")

        # 根據兩種寵物類別分別迴圈爬取資料（因需帶入不同資料POST）
        for ani in animal:
            data = post_data(start_date, end_date, ani, dist)

            # 設定最大嘗試次數3次，若是因間隔過短或許可在多次嘗試後成功
            max_tries = 3
            for tries in range(1, max_tries + 1):
                try:
                    data_json = post_requests(
                        url=url, headers=headers, data=data)

                    # 儲存成DF後新增日期、寵物類別、縣市欄位
                    df = pd.DataFrame(data_json)
                    df["animal"] = ani
                    df["date"] = start_date
                    df["city"] = dist
                    df["update_date"] = today.strftime("%Y/%m/%d")

                    # 將爬取的資料與原始資料結合並存檔
                    # 避免程式意外中止時，會因完全沒有存檔而丟失紀錄，浪費時間及資源
                    df_main = pd.concat([df_main, df])

                    # 以起始日（昨天）當作檔名存檔
                    save_date = start.strftime("%Y%m%d")
                    file = f"{save_date}_pet_regis.csv"
                    file_path = os.path.join(folder, file)
                    df_main.to_csv(file_path, index=False,
                                   encoding="utf-8-sig")
                    print(f"已更新{city_dict[dist]} {save_date}的{animal[ani]}資料")
                    time.sleep(7)
                    break

                except Exception as e:
                    stop = stop_try(tries, max_tries)
                    if stop:
                        print("已達最大嘗試次數，跳過該日")
                        break
                    else:
                        print(
                            f"第{tries}次嘗試錯誤：{e}\n等待{tries*5}秒後再次嘗試..."
                        )
                        time.sleep(tries * 5)
                        continue

        # 當完成一個縣市資料更新，就印出訊息告知
        print(f"已完成更新{city_dict[dist]}資料至{end_date}！")

    start += timedelta(days=1)

print("已完成所有資料更新！")

開始搜尋NewTaipei鄉鎮市區資料...
已更新NewTaipei 20251102的犬資料
已更新NewTaipei 20251102的貓資料
已完成更新NewTaipei資料至2025/11/03！
開始搜尋Taipei鄉鎮市區資料...
已更新Taipei 20251102的犬資料
已更新Taipei 20251102的貓資料
已完成更新Taipei資料至2025/11/03！
開始搜尋Taichung鄉鎮市區資料...
已更新Taichung 20251102的犬資料
已更新Taichung 20251102的貓資料
已完成更新Taichung資料至2025/11/03！
開始搜尋Tainan鄉鎮市區資料...
已更新Tainan 20251102的犬資料
已更新Tainan 20251102的貓資料
已完成更新Tainan資料至2025/11/03！
開始搜尋Kaohsiung鄉鎮市區資料...
已更新Kaohsiung 20251102的犬資料
已更新Kaohsiung 20251102的貓資料
已完成更新Kaohsiung資料至2025/11/03！
開始搜尋NewTaipei鄉鎮市區資料...
已更新NewTaipei 20251103的犬資料
已更新NewTaipei 20251103的貓資料
已完成更新NewTaipei資料至2025/11/04！
開始搜尋Taipei鄉鎮市區資料...
已更新Taipei 20251103的犬資料
已更新Taipei 20251103的貓資料
已完成更新Taipei資料至2025/11/04！
開始搜尋Taichung鄉鎮市區資料...
已更新Taichung 20251103的犬資料
已更新Taichung 20251103的貓資料
已完成更新Taichung資料至2025/11/04！
開始搜尋Tainan鄉鎮市區資料...
已更新Tainan 20251103的犬資料
已更新Tainan 20251103的貓資料
已完成更新Tainan資料至2025/11/04！
開始搜尋Kaohsiung鄉鎮市區資料...
已更新Kaohsiung 20251103的犬資料
已更新Kaohsiung 20251103的貓資料
已完成更新Kaohsiung資料至2025/11/04！
開始搜尋NewTaipei鄉鎮市

In [3]:
url = "https://www.pet.gov.tw/Handler/PostData.ashx"

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
}

# city_list = get_city_list(url, headers) # 這行用於取得全台縣市代號
city_list = ["A"]

# 取得當前日期
today = date.today()
start = datetime(2025, 11, 2).date()

# 區分犬貓代號
animal = {"0": "犬", "1": "貓"}

# 代碼對應的縣市名，用於存檔檔名
city_dict = get_city_code()

end = start + timedelta(days=1)

# 若起始日期早於當下日期，則開始迴圈更新資料
dist = "A"
start_date = start.strftime("%Y/%m/%d")
end_date = end.strftime("%Y/%m/%d")
ani = "0"
data = post_data(start_date, end_date, ani, dist)

data_json = post_requests(
    url=url, headers=headers, data=data)

# 儲存成DF後新增日期、寵物類別、縣市欄位
df = pd.DataFrame(data_json)

df

Unnamed: 0,AreaID,AreaName,fld01,fld02,fld03,fld05,fld06,fld04,fld08,fld07,fld10
0,207,207萬里區,0,0,0,0,0,0,0,0,0
1,208,208金山區,0,0,0,0,0,0,0,0,0
2,220,220板橋區,8,12,0,24,4,1,0,0,0
3,221,221汐止區,0,4,0,4,2,2,0,0,0
4,222,222深坑區,0,0,0,0,0,0,0,0,0
5,223,223石碇區,0,0,0,1,0,0,0,0,0
6,224,224瑞芳區,1,1,0,0,0,1,0,0,0
7,226,226平溪區,0,0,0,0,0,0,0,0,0
8,227,227雙溪區,0,0,0,0,0,0,0,0,0
9,228,228貢寮區,0,0,0,0,0,0,0,0,0
