In [13]:
import json
import os
import time
from datetime import date, datetime, timedelta
from pathlib import Path

import pandas as pd
import requests
from bs4 import BeautifulSoup

from mods import pandas_mod as pdm

In [14]:
# 自訂函式
def get_city_list(url, headers):
    """用於取得各縣市之代碼，並轉換成list方便迴圈"""
    fmt = "%Y/%m/%d"
    start = date.today() - timedelta(days=1)
    start_date = start
    end_date = (start_date + timedelta(days=1)).strftime(fmt)
    start_date = start_date.strftime(fmt)

    data = {
        "Method": "O302_2",
        "Param": json.dumps(
            {
                "SDATE": start_date,
                "EDATE": end_date,
                "Animal": "0",
            }
        ),
    }

    res = requests.post(url, headers=headers, data=data)
    res.encoding = "utf-8-sig"

    data_orig = json.loads(res.text)
    data_str = data_orig.get("Message", "[]")
    data_json = json.loads(data_str)

    df_id_list = pd.DataFrame(data_json)
    country_list = list(df_id_list["AreaID"])

    return country_list


def get_city_code():
    return {
        "A": "NewTaipei",
        "V": "Taipei",
        "S": "Taichung",
        "U": "Tainan",
        "W": "Kaohsiung",
        "C": "Taoyuan",
        "B": "Yilan",
        "D": "Hsinchu",
        "E": "Miaoli",
        "G": "Changhua",
        "H": "Nantou",
        "I": "Yunlin",
        "J": "Chiayi",
        "M": "Pingtung",
        "N": "Taitung",
        "O": "Hualien",
        "P": "Penghu",
        "Q": "Keelung",
        "R": "HsinchuCity",
        "T": "ChiayiCity",
        "Y": "Kinmen",
        "X": "Lianjiang",
    }


def get_col():
    return [
        "AreaID",
        "AreaName",
        "fld01",
        "fld02",
        "fld03",
        "fld05",
        "fld06",
        "fld04",
        "fld08",
        "fld07",
        "fld10",
        "animal",
    ]


def set_start_date(df_main):
    fmt = "%Y/%m/%d"
    if len(df_main.index) == 0:
        start = datetime.strptime("2020/01/01", fmt).date()
    else:
        df_main["date"] = pd.to_datetime(df_main["date"], format=fmt)
        last_date = df_main["date"].iloc[-1]
        start = last_date + timedelta(days=1)
        start = start.date()

    return start


def get_start_end_date(start):
    fmt = "%Y/%m/%d"
    start_date = start
    end_date = (start_date + timedelta(days=1)).strftime(fmt)
    start_date = start_date.strftime(fmt)

    return start_date, end_date


def post_data(start_date, end_date, ani, dist):
    return {
            "Method": "O302C_2",
            "Param": json.dumps(
                {
                    "SDATE": start_date,
                    "EDATE": end_date,
                    "Animal": ani,
                    "CountyID": dist,
                }),
            }


def post_requests(url, headers, data):
    res = requests.post(url=url, headers=headers, data=data)
    res.encoding = "utf-8-sig"

    # 因資料是以json格式儲存和回傳，故需json解碼
    data_orig = json.loads(res.text)
    data_str = data_orig.get("Message", "[]")
    data_json = json.loads(data_str)

    return data_json


def stop_try(tries, max_tries):
    if tries >= max_tries:
        return True
    else:
        return False

In [None]:
# 設定訪問網址及headers
url = "https://www.pet.gov.tw/Handler/PostData.ashx"

headers = {
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
}

# 先取得縣市代碼list
# city_list = get_city_list(url, headers) # 這行用於取得全台縣市代號
city_list = ["V", "S", "U", "W"]

# 取得當前日期
today = date.today().strftime("%Y/%m%d")
start = today - timedelta(days=1)

# 區分犬貓代號
animal = ["0", "1"]

# 代碼對應的縣市名，用於存檔檔名
city_dict = get_city_code()

# 開始按照縣市代碼list進行迴圈
for dist in city_list:

    # 根據縣市設定檔案路徑
    folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\pet_regis"
    file = f"{city_dict[dist]}.csv"
    print(f"開始搜尋{city_dict[dist]}鄉鎮市區資料...")

    # 先判斷是否存在既有檔案，若有則讀入，若無則建立新的DF
    columns = get_col()
    df_main, file_path = pdm.read_or_build(folder, file, columns)

    # 判斷是否已有資料存在，若無則從頭爬取，若有則從最後一筆資料日期開始接續
    start = set_start_date(df_main)

    # 若起始日期早於當下日期，則開始迴圈更新資料
    while start < today:
        start_date, end_date = get_start_end_date(start)

        # 根據兩種寵物類別分別迴圈爬取資料（因需帶入不同資料POST）
        for ani in animal:
            data = post_data(start_date, end_date, ani, dist)

            # 設定最大嘗試次數3次，若是因間隔過短或許可在多次嘗試後成功
            max_tries = 3
            for tries in range(1, max_tries + 1):
                try:
                    data_json = post_requests(url=url, headers=headers, data=data)

                    # 儲存成DF後新增日期、寵物類別、縣市欄位
                    df = pd.DataFrame(data_json)
                    df["animal"] = ani
                    df["date"] = start_date
                    df["city"] = dist
                    df["update_date"] = today.strftime("%Y/%m/%d")

                    # 將爬取的資料與原始資料結合並存檔
                    # 避免程式意外中止時，會因完全沒有存檔而丟失紀錄，浪費時間及資源
                    df_main = pd.concat([df_main, df])
                    df_main.to_csv(file_path, index=False, encoding="utf-8")
                    time.sleep(7)
                    break

                except Exception as e:
                    stop = stop_try(tries, max_tries)
                    if stop:
                        print("已達最大嘗試次數，跳過該日")
                        break
                    else:
                        print(
                            f"第{tries}次嘗試錯誤：{e}\n等待{tries*5}秒後再次嘗試..."
                        )
                        time.sleep(tries * 5)
                        continue

        # 每完成一日犬貓資料更新，就印出訊息告知，並將起始日+1重複迴圈
        print(f"已更新{city_dict[dist]} {start_date}犬貓資料")
        start += timedelta(days=1)

    # 當完成一個縣市資料更新，就印出訊息告知
    print(f"已完成更新{city_dict[dist]}資料至{today}！")

print("已完成所有資料更新！")


開始搜尋Taipei鄉鎮市區資料...
已更新Taipei 2025/10/26犬貓資料
已更新Taipei 2025/10/27犬貓資料
第1次嘗試錯誤：Expecting value: line 1 column 1 (char 0)
等待5秒後再次嘗試...
已更新Taipei 2025/10/28犬貓資料
已更新Taipei 2025/10/29犬貓資料
已更新Taipei 2025/10/30犬貓資料
已完成更新Taipei資料至2025-10-31！
開始搜尋Taichung鄉鎮市區資料...
已更新Taichung 2025/10/26犬貓資料
已更新Taichung 2025/10/27犬貓資料
已更新Taichung 2025/10/28犬貓資料
已更新Taichung 2025/10/29犬貓資料
已更新Taichung 2025/10/30犬貓資料
已完成更新Taichung資料至2025-10-31！
開始搜尋Tainan鄉鎮市區資料...
已更新Tainan 2025/10/26犬貓資料
已更新Tainan 2025/10/27犬貓資料
已更新Tainan 2025/10/28犬貓資料
已更新Tainan 2025/10/29犬貓資料
已更新Tainan 2025/10/30犬貓資料
已完成更新Tainan資料至2025-10-31！
開始搜尋Kaohsiung鄉鎮市區資料...
已更新Kaohsiung 2025/10/26犬貓資料
已更新Kaohsiung 2025/10/27犬貓資料
已更新Kaohsiung 2025/10/28犬貓資料
已更新Kaohsiung 2025/10/29犬貓資料
已更新Kaohsiung 2025/10/30犬貓資料
已完成更新Kaohsiung資料至2025-10-31！
已完成所有資料更新！
