In [None]:
import json
import os
import time
from datetime import date, datetime, timedelta
from pathlib import Path

import pandas as pd
import requests
from bs4 import BeautifulSoup

# 函式

In [None]:
def get_city_dict() -> dict:
    return {
        "A":"新北市",
        "V":"臺北市",
        "C":"桃園市",
        "S":"臺中市",
        "U":"臺南市",
        "W":"高雄市"
    }

In [None]:
def requests_data(city_code, city_name) -> dict:
    data_dict = {
        "url": "https://www.pet.gov.tw/Handler/PostData.ashx",
        "headers": {
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
        },
        "city": city_code,
        "city_name":city_name,
        "animal": {"0": "犬", "1": "貓"},
        "today": date.today(),
        "start": date.today() - timedelta(days=1),
        "start_date": (date.today() - timedelta(days=1)).strftime("%Y/%m/%d"),
        "end_date": date.today().strftime("%Y/%m/%d"),
        "folder": r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\pet_regis",
        "file_name": f"{city_name}.csv"
    }

    return data_dict

In [None]:
def post_data(start_date, end_date, ani, dist) -> dict:
    return {
            "Method": "O302C_2",
            "Param": json.dumps(
                {
                    "SDATE": start_date,
                    "EDATE": end_date,
                    "Animal": ani,
                    "CountyID": dist,
                }),
            }

In [None]:
def post_requests(url, headers, data) -> json:
    res = requests.post(url=url, headers=headers, data=data)
    res.encoding = "utf-8-sig"

    # 因資料是以json格式儲存和回傳，故需json解碼
    data_orig = json.loads(res.text)
    data_str = data_orig.get("Message", "[]")
    data_json = json.loads(data_str)

    return data_json

def get_data(data_dict, data) -> pd.DataFrame:
    max_tries = 3
    for tries in range(1, max_tries+1):
        try:
            data_json = post_requests(url=data_dict["url"], headers=data_dict["headers"], data=data)
            df = pd.DataFrame(data_json)
            break
        except Exception as e:
            if tries >= max_tries:
                raise Exception(f"已達最大嘗試次數，仍發生錯誤:{e}，終止程式。")
            else:
                print(f"發生錯誤：{e}，{tries*4}秒後重試...")
                time.sleep(tries*4)

    return df

In [None]:
def add_columns(df: pd.DataFrame, data_dict: dict, ani: str) -> pd.DataFrame :
    df["date"] = data_dict["start_date"]
    df["animal"] = ani
    df["city"] = data_dict["city"]
    df["update_date"] = data_dict["end_date"]

    return df

In [None]:
def combine_dataframe(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame :
    df_combine = pd.concat([df1, df2], ignore_index=True)

    return df_combine

In [None]:
def save_data(folder: str, file_name: str, df: pd.DataFrame) -> tuple[bool, str]:
    try:
        folder = Path(folder)
        path = folder / file_name
        df.to_csv(path, index=False, encoding="utf-8")

        return True, "成功！"

    except Exception as e:
        return False, e

# 主程式

In [None]:
# 主程式

gov_city_dict = get_city_dict()


data_dict = requests_data(
    city_code=gov_city_dict[0], city_name=gov_city_dict["A"])


data_dog = post_data(
    start_date=data_dict["start_date"], end_date=data_dict["end_date"], ani="0", dist=data_dict["city"])
data_cat = post_data(
    start_date=data_dict["start_date"], end_date=data_dict["end_date"], ani="1", dist=data_dict["city"])


df_dog = get_data(data_dict=data_dict, data=data_dog)
print(f"已完成{data_dict["city_name"]}的{data_dict["animal"]["0"]}資料抓取")
df_cat = get_data(data_dict=data_dict, data=data_cat)
print(f"已完成{data_dict["city_name"]}的{data_dict["animal"]["1"]}資料抓取")


df_dog = add_columns(df=df_dog, data_dict=data_dict, ani="0")
df_cat = add_columns(df=df_cat, data_dict=data_dict, ani="1")


df_combine = combine_dataframe(df1=df_dog, df2=df_cat)


result, text = save_data(
    folder=data_dict["folder"], file_name=data_dict["file_name"], df=df_combine)

if result:
    print(f"儲存結果：{text}")
else:
    print(f"儲存失敗：{text}")