In [2]:
import pandas as pd
import numpy as np
import os
import googlemaps
from bs4 import BeautifulSoup
from mods import gmap as gm
from mods import readdata as rd
from mods import savedata as sd
from mods import extractdata as ed
from dotenv import load_dotenv

In [3]:
# 讀取.env檔
load_dotenv()

True

In [4]:
# 取得google key
API_KEY = os.getenv("GOOGLE_MAP_KEY_CHGWYELLOW")

In [None]:
# 讀取csv檔
path = "../../data/raw/hospital_data.csv"
df = rd.get_csv_data(path)
df

In [None]:
# ETL開始
need_revised_columns = [
    "license",
    "license_date",
    "vet",
    "tel",
    "address",
    "service",
]

# 移除:前面的資料
for col in need_revised_columns:
    df[col] = (
        df[col]
        .str.split("：")
        .str[-1]
        .str.replace(" ", "", regex=False)
        .str.strip()
    )

In [None]:
# 將空字串設為NaN
df = df.replace({"": np.nan})
df = df.fillna("無此資訊")

# 執行正則表達比對
df["city"], df["district"] = zip(*df["address"].apply(ed.extract_city_district))

In [None]:
# 只取出city非空值的資料，其他drop，所以只會留下六都資訊
df = df[df["city"].notna()].reset_index(drop=True)

In [None]:
# drop不需要的欄位
df = df.drop(columns=["license", "license_date", "vet", "service"])
df

In [None]:
# 透過google map API取得place_id
result = []
# enumerate會自動將被iterate的物件附上index
for i, (idx, row) in enumerate(df.iterrows()):
    query = f"{row['name']} {row['address']}"

    result.append(gm.get_place_id(API_KEY, query))
df["place_id"] = np.nan
df.loc[ : , "place_id"] = result

In [5]:
df = rd.get_csv_data("../../../data/processed/hospital_data_ETL.csv")
df

[32m[✓] CSV檔案已取回
[0m


Unnamed: 0,name,tel,address,city,district,place_id
0,101台北貓醫院,02-25091101,臺北市中山區建國北路3段101號,臺北市,中山區,ChIJScJh3VepQjQRKVxXNc4py4g
1,一嘉動物醫院,07-3318123,高雄市苓雅區興中一路383號,高雄市,苓雅區,ChIJ64N-ioIEbjQRnt-Lag40nGU
2,人人動物醫院,07-7830515,高雄市大寮區鳳林三路827號,高雄市,大寮區,ChIJDyeD-p0bbjQRWOSLl-EmnXI
3,人人動物醫院,03-4895900,桃園市龍潭區北龍路122號,桃園市,龍潭區,ChIJVwkN_uI8aDQRsbBO_6P3Iks
4,人人動物醫院八德介壽院,03-3661314,桃園市八德區介壽路二段296號,桃園市,八德區,ChIJYXczOywfaDQRAl-BQfr2uH4
...,...,...,...,...,...,...
953,鶯歌奇緣動物醫院,02-26792243,新北市鶯歌區忠孝街8號,新北市,鶯歌區,ChIJLzK-FVUZaDQRSrx7lYJzhPM
954,鶯歌獸醫院,02-26782718,新北市鶯歌區文化路三八三號,新北市,鶯歌區,ChIJrab-8qoeaDQR9KYUa1SylSY
955,聽狗嶼動物診所,0919349334,新北市永和區秀朗路2段120號,新北市,永和區,ChIJD6oTBempQjQRp0BIdK7Fm_g
956,麟安動物醫院,27383008,臺北市大安區和平東路3段324號,臺北市,大安區,ChIJO3YTqjeqQjQRZIWl16xga5E


In [6]:
def gmap_info(ori_name, api_key, place_id):
    """提供place_id，回傳名稱、營業狀態、營業時間、gmap評分、經緯度、gmap網址、最新評論日期"""
    if pd.notna(place_id) and place_id not in (None, "", "nan"):
        try:
            gmaps = googlemaps.Client(key=api_key)
            detail = gmaps.place(place_id=place_id, language='zh-TW')
        except Exception as e:
            # API 呼叫失敗，回傳 minimal fallback
            return {
                "name": ori_name,
                "place_id": place_id,
                "business_status": None,
                "address": None,
                "phone": None,
                "opening_hours": None,
                "rating": None,
                "rating_total": None,
                "longitude": None,
                "latitude": None,
                "map_url": None,
                "newest_review": None,
            }

        result = detail.get("result") or {}
        name = result.get("name")
        business_status = result.get("business_status")

        formatted_address = result.get("formatted_address")
        adr_address = result.get("adr_address")
        if formatted_address:
            address = formatted_address
        elif adr_address:
            address = BeautifulSoup(adr_address, "html.parser").text
        else:
            address = None

        phone = result.get("formatted_phone_number")
        if isinstance(phone, str):
            phone = phone.replace(" ", "")

        opening_hours = result.get("opening_hours", {}).get("weekday_text")
        rating = result.get("rating")
        rating_total = result.get("user_ratings_total")
        longitude = result.get("geometry", {}).get("location", {}).get("lng")
        latitude = result.get("geometry", {}).get("location", {}).get("lat")
        map_url = result.get("url")
        review_list = result.get("reviews")
        newest_review = gm.newest_review_date(review_list) if review_list else None

        place_info = {
            "name": name,
            "place_id": place_id,
            "business_status": business_status,
            "address": address,
            "phone": phone,
            "opening_hours": opening_hours,
            "rating": rating,
            "rating_total": rating_total,
            "longitude": longitude,
            "latitude": latitude,
            "map_url": map_url,
            "newest_review": newest_review,
        }
    else:
        place_info = {
            "name": ori_name,
            "place_id": None,
            "business_status": None,
            "address": None,
            "phone": None,
            "opening_hours": None,
            "rating": None,
            "rating_total": None,
            "longitude": None,
            "latitude": None,
            "map_url": None,
            "newest_review": None,
        }

    return place_info

In [7]:
# 透過place_id找到詳細資料
result = []
for _, row in df.iterrows():
    result.append(gmap_info(row["name"], API_KEY, row["place_id"]))
    
df1 = pd.DataFrame(result)
df1

Unnamed: 0,name,place_id,business_status,address,phone,opening_hours,rating,rating_total,longitude,latitude,map_url,newest_review
0,101台北貓醫院,ChIJScJh3VepQjQRKVxXNc4py4g,OPERATIONAL,10491台灣臺北市中山區建國北路三段101號,0225091101,"[星期一: 10:30 – 12:30, 14:00 – 21:00, 星期二: 10:30...",4.7,310.0,121.536406,25.066119,https://maps.google.com/?cid=9857018175065381929,2024-06-06
1,一嘉動物醫院,ChIJ64N-ioIEbjQRnt-Lag40nGU,OPERATIONAL,802台灣高雄市苓雅區興中一路383號,073318123,"[星期一: 09:00 – 12:00, 14:00 – 21:00, 星期二: 09:00...",4.4,137.0,120.306092,22.616130,https://maps.google.com/?cid=7321784330719322014,2025-08-18
2,人人動物醫院,ChIJDyeD-p0bbjQRWOSLl-EmnXI,OPERATIONAL,83154台灣高雄市大寮區鳳林三路827號,077830515,"[星期一: 09:00 – 12:00, 14:00 – 21:00, 星期二: 09:00...",4.0,105.0,120.390155,22.612722,https://maps.google.com/?cid=8258800042019382360,2019-11-04
3,人人動物醫院龍潭總院,ChIJVwkN_uI8aDQRsbBO_6P3Iks,OPERATIONAL,325台灣桃園市龍潭區北龍路122號,034895900,"[星期一: 09:00 – 11:30, 13:00 – 16:30, 18:00 – 20...",4.2,666.0,121.214110,24.866010,https://maps.google.com/?cid=5414161985787834545,2024-07-09
4,人人動物醫院八德院 - 犬貓科/內科/軟組織外科/骨科/心臟科/腫瘤科/齒科/眼科,ChIJYXczOywfaDQRAl-BQfr2uH4,OPERATIONAL,334台灣桃園市八德區介壽路二段296號,033661314,"[星期一: 09:00 – 11:45, 13:30 – 17:15, 18:30 – 20...",4.3,734.0,121.297786,24.949908,https://maps.google.com/?cid=9131319799194935042,2025-07-12
...,...,...,...,...,...,...,...,...,...,...,...,...
953,鶯歌奇緣動物醫院,ChIJLzK-FVUZaDQRSrx7lYJzhPM,OPERATIONAL,239台灣新北市鶯歌區忠孝街8號,0226792243,"[星期一: 10:00 – 12:30, 14:00 – 20:30, 星期二: 10:00...",4.2,247.0,121.354973,24.956389,https://maps.google.com/?cid=17547277052833152074,2025-03-19
954,鶯歌獸醫院,ChIJrab-8qoeaDQR9KYUa1SylSY,OPERATIONAL,239台灣新北市鶯歌區文化路383號,0226782718,"[星期一: 09:00 – 21:00, 星期二: 09:00 – 21:00, 星期三: ...",4.2,101.0,121.352454,24.950669,https://maps.google.com/?cid=2780324420614661876,2019-03-31
955,D.I.N.G.O.寵物訓練教室,ChIJD6oTBempQjQRp0BIdK7Fm_g,OPERATIONAL,234台灣新北市永和區秀朗路二段120號,0229231101,,4.6,43.0,121.524554,25.001537,https://maps.google.com/?cid=17914129295862153383,2020-11-04
956,麟安動物醫院,ChIJO3YTqjeqQjQRZIWl16xga5E,OPERATIONAL,106台灣臺北市大安區和平東路三段324號,0227383008,"[星期一: 09:30 – 12:30, 14:30 – 19:30, 星期二: 09:30...",4.5,470.0,121.557044,25.019874,https://maps.google.com/?cid=10478575253476181348,2025-08-06


In [8]:
df_merged = df.merge(
    df1,
    how="outer",
    left_on=df["place_id"],
    right_on=df1["place_id"],
    suffixes=["_filtered", "_checked"],
)

In [9]:
# 去除重複欄位
df_merged = df_merged.drop(columns=["place_id_filtered", "place_id_checked"])
df_merged

Unnamed: 0,key_0,name_filtered,tel,address_filtered,city,district,name_checked,business_status,address_checked,phone,opening_hours,rating,rating_total,longitude,latitude,map_url,newest_review
0,ChIJ-0Cdf_YFbjQRbesCYU-Iuts,波可動物醫院,07-3495560,高雄市左營區自由四路367號,高雄市,左營區,波可動物醫院,OPERATIONAL,813台灣高雄市左營區自由四路367號,073495560,"[星期一: 休息, 星期二: 09:30 – 12:30, 14:00 – 18:00, 1...",4.7,187.0,120.316895,22.685741,https://maps.google.com/?cid=15833117314485775213,2025-02-20
1,ChIJ-2Kwrd-pQjQRu2sDeVqfQwM,福林動物醫院,29218867,新北市中和區中安街230號1、2樓,新北市,中和區,福林動物醫院,OPERATIONAL,235台灣中和區中安街230號,0229218867,"[星期一: 09:00 – 21:00, 星期二: 09:00 – 21:00, 星期三: ...",4.4,254.0,121.512245,24.998550,https://maps.google.com/?cid=235206816479538107,2025-03-25
2,ChIJ-4EQmeGrQjQRXLlcJKPxhvY,路米動物醫院,02-25163328,臺北市中山區建國北路2段89號1樓,臺北市,中山區,路米動物醫院,OPERATIONAL,104台灣臺北市中山區建國北路二段89號,0277565218,"[星期一: 10:00 – 13:00, 14:00 – 17:00, 18:00 – 21...",4.1,252.0,121.537408,25.058602,https://maps.google.com/?cid=17764151463201519964,2025-04-08
3,ChIJ-7BwXgIFbjQRf4q2xQ91GgY,馬汀動物醫院,07-5507327,高雄市左營區至聖路235號,高雄市,左營區,馬汀動物醫院,OPERATIONAL,813台灣高雄市左營區至聖路235號,075507327,"[星期一: 09:00 – 13:00, 15:00 – 20:00, 星期二: 09:00...",4.5,255.0,120.305736,22.659533,https://maps.google.com/?cid=439792624224078463,2025-06-17
4,ChIJ-8oLGbcjaDQRL9U9B2W2raA,佑安動物醫院,4930603,桃園市平鎮區民族路2段29號,桃園市,平鎮區,佑安動物醫院,OPERATIONAL,324台灣桃園市平鎮區民族路二段29號,034930603,"[星期一: 09:00 – 12:00, 14:00 – 20:00, 星期二: 09:00...",4.1,172.0,121.207734,24.957217,https://maps.google.com/?cid=11578110762068858159,2019-11-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1011,ChIJzbBzGAOqQjQR5AQTWZQ6vdk,品安動物醫院,02-29359188,臺北市文山區羅斯福路6段89號1樓,臺北市,文山區,品安動物醫院,CLOSED_TEMPORARILY,116台灣臺北市文山區羅斯福路六段89號,0229359188,,4.7,321.0,121.540627,24.998426,https://maps.google.com/?cid=15689761085652665572,2025-03-19
1012,ChIJzwSWAxE3aDQRBK1ry__fkEw,默墨犬貓專科醫院,0979-288648,臺北市內湖區成功路3段83-1號1樓,臺北市,內湖區,墨優犬貓專科醫院,OPERATIONAL,302台灣新竹縣竹北市嘉豐六路二段8號,035500899,"[星期一: 10:30 – 12:30, 14:00 – 17:00, 18:00 – 20...",5.0,18.0,121.028730,24.810039,https://maps.google.com/?cid=5517155833251343620,2025-07-28
1013,ChIJzys4M5kgaDQRGM-OxeiMKSE,麗園動物醫院大園分院,3855577,桃園市大園區中正東路5號,桃園市,大園區,麗園動物醫院-大園分院,OPERATIONAL,337台灣桃園市大園區中正東路5號,033855577,"[星期一: 09:00 – 12:00, 13:00 – 17:00, 18:00 – 20...",4.4,316.0,121.197797,25.064141,https://maps.google.com/?cid=2389596008671530776,2025-04-05
1014,ChIJzz7UHfweaDQR2KnGntyNuak,安欣動物醫院,03-3367775,桃園市桃園區中福街60號一樓,桃園市,桃園區,安欣動物醫院,OPERATIONAL,330台灣桃园市桃園區中福街60號1樓,033367775,"[星期一: 10:00 – 21:00, 星期二: 10:00 – 21:00, 星期三: ...",4.7,442.0,121.304258,24.997432,https://maps.google.com/?cid=12229962241797433816,2025-01-12


In [10]:
sd.store_to_csv_no_index(df_merged, "../../../data/processed/hospital_data_ETL.csv")

[32m[✓] 不含index的CSV檔已存檔完畢
