In [1]:
import pandas as pd
import os
from datetime import date, timedelta
from dotenv import load_dotenv
from sqlalchemy import create_engine

In [2]:
# 設定縣市名稱及列表、欄位名稱，先建立空的主表
city_dict = {
    "A":"新北市",
    "V":"臺北市",
    "C":"桃園市",
    "S":"臺中市",
    "U":"臺南市",
    "W":"高雄市"
}

file_date = (date.today() - timedelta(days=1)).strftime("%Y%m%d")
folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\pet_regis"
file = f"{file_date}_pet_regis.csv"
file_path = os.path.join(folder, file)

df = pd.read_csv(file_path)

# 先還原欄位名
columns = [
    "area_id",
    "district",
    "登記單位數",
    "regis_count",
    "removal_count",
    "轉讓數",
    "變更數",
    "絕育數",
    "絕育除戶數",
    "免絕育數",
    "免絕育除戶數",
    "animal",
    "date",
    "city",
    "update_date"
]

df.columns = columns

# 將city還原為中文名
df["city"] = df["city"].apply(lambda x: city_dict[x])

# 將不要的欄位去除
df.drop(columns=["area_id", "登記單位數", "轉讓數", "變更數",
             "絕育數", "絕育除戶數", "免絕育數", "免絕育除戶數"], axis=1, inplace=True)

# 將區的郵遞區號去除
df["district"] = df["district"].apply(lambda x: x[3:])

# join取得loc id
load_dotenv()

username = os.getenv("MYSQL_USERNAME")
password = os.getenv("MYSQL_PASSWORD")
target_ip = os.getenv("MYSQL_IP")
target_port = int(os.getenv("MYSQL_PORTT"))
db_name = os.getenv("MYSQL_DB_NAME")

engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{target_ip}:{target_port}/{db_name}")

sql = "SELECT * FROM location"

df_loc = pd.read_sql(sql, engine)
df_loc = df_loc[["loc_id", "city", "district"]]

df = df.merge(df_loc, how="left", on=["city", "district"])

df.drop(columns=["city", "district"], axis=1, inplace=True)

# 將欄位重新排序
new_col = ["loc_id", "date", "animal",
           "regis_count", "removal_count", "update_date"]
df = df[new_col]

In [3]:
folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\pet_regis"
save_file = f"{file_date}_pet_regis_ETL.csv"
save_path = os.path.join(folder, save_file)

df.to_csv(save_path, index=False, encoding="utf-8")
print(f"{file_date}檔案儲存完畢！")

20251030檔案儲存完畢！


In [4]:
# df.to_sql(name="pet_regis", con=engine, index=False, if_exists="append")
# print(f"{file_date}檔案儲存完畢！")