# 搜尋gmap並且合併後存檔

In [None]:
import os
import time

import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from opencc import OpenCC

from mods import gmap
from mods import pandas_mod as pdm
from mods import date_mod as dtm

load_dotenv()

username = os.getenv("MYSQL_USERNAME")
password = os.getenv("MYSQL_PASSWORD")
target_ip = os.getenv("MYSQL_IP")
target_port = int(os.getenv("MYSQL_PORTT"))
db_name = os.getenv("MYSQL_DB_NAME")

engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{target_ip}:{target_port}/{db_name}")

type_list = ["salon", "restaurant", "supplies"]

for type_ in type_list:
    sql = f"SELECT * FROM {type_}_place_id"

    df = pd.read_sql(sql, engine)

    df_edit = df.copy()


    # 根據place id抓取資料，並做成新的dataframe
    api_key = os.getenv("GMAP_KEY6")

    id_list = df_edit["place_id"].values

    data = []
    for id_ in id_list:
        place_info = gmap.get_place_dict(api_key=api_key, place_id=id_)
        data.append(place_info)
        time.sleep(1)

    df_search = pd.DataFrame(data=data)

    # 將要合併的欄位取出，並與原本的df合併
    df_search = df_search[['place_id', 'address', 'phone', 'opening_hours', 'rating',
                        'rating_total', 'longitude', 'latitude', 'map_url', 'website', 'newest_review']]

    df_combine = df_edit.merge(df_search, how="left", on="place_id")

    folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\gmap_full_search\temp"
    file = "salon_temp.csv"
    path = os.path.join(folder, file)
    df_combine.to_csv(path, index=False)

# 以下為分段測試

In [49]:
import os
import time
import ast

import pandas as pd
from dotenv import load_dotenv
from sqlalchemy import create_engine
from opencc import OpenCC

from mods import gmap
from mods import pandas_mod as pdm
from mods import date_mod as dtm

In [2]:
load_dotenv()

username = os.getenv("MYSQL_USERNAME")
password = os.getenv("MYSQL_PASSWORD")
target_ip = os.getenv("MYSQL_IP")
target_port = int(os.getenv("MYSQL_PORTT"))
db_name = os.getenv("MYSQL_DB_NAME")

engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{target_ip}:{target_port}/{db_name}")

sql = "SELECT * FROM salon_place_id"

df = pd.read_sql(sql, engine)

df_edit = df[150:201].copy()

In [3]:
# 根據place id抓取資料，並做成新的dataframe
api_key = os.getenv("GMAP_KEY6")

id_list = df_edit["place_id"].values

data = []
for id_ in id_list:
    place_info = gmap.get_place_dict(api_key=api_key, place_id=id_)
    data.append(place_info)
    time.sleep(1)

df_search = pd.DataFrame(data=data)

In [4]:
# 將要合併的欄位取出，並與原本的df合併
df_search = df_search[['place_id', 'address', 'phone', 'opening_hours', 'rating',
                       'rating_total', 'longitude', 'latitude', 'map_url', 'website', 'newest_review']]

df_combine = df_edit.merge(df_search, how="left", on="place_id")

folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\gmap_full_search\temp"
file = "salon_temp.csv"
path = os.path.join(folder, file)
df_combine.to_csv(path, index=False)

## 使用讀取檔案開始測試

In [50]:
folder = r"C:\Users\add41\Documents\Data_Engineer\Project\example_data\gmap_full_search\temp"
file = "salon_temp.csv"
path = os.path.join(folder, file)

df_combine = pd.read_csv(path)

df_combine

Unnamed: 0,name,place_id,buss_status,update_time,in_boundary,update_date,address,phone,opening_hours,rating,rating_total,longitude,latitude,map_url,website,newest_review
0,萊德寵物美學館,ChIJ_Z6eewoDaDQReDyGhLWuL0Q,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,236台灣新北市土城區延平街77號,222601679.0,"['星期一: 11:00 – 19:30', '星期二: 休息', '星期三: 11:00 ...",4.5,43,121.467541,24.993201,https://maps.google.com/?cid=4913337813120203896,https://www.facebook.com/%E8%90%8A%E5%BE%B7%E5...,2023-07-22
1,寵物姐姐,ChIJdTvTIQYDaDQRx0KLyPlxhOo,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區景平路578號1樓,222471077.0,"['星期一: 11:00 – 20:00', '星期二: 休息', '星期三: 11:00 ...",4.8,57,121.50161,24.995601,https://maps.google.com/?cid=16898757019426308807,,2025-03-23
2,就是狗寵物美容店,ChIJWdc0RuCpQjQRVbpeWJKNWmw,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,234台灣永和區智光街103號,289432787.0,"['星期一: 10:00 – 19:00', '星期二: 10:00 – 19:00', '...",4.6,21,121.516209,24.996687,https://maps.google.com/?cid=7807708563687520853,,2019-02-03
3,唯心寵物澡堂,ChIJa6IDlM8DaDQRp3e17yBhoU8,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,236台灣新北市土城區延吉街168巷16號,222624789.0,"['星期一: 11:00 – 19:00', '星期二: 11:00 – 19:00', '...",4.9,37,121.468022,24.986313,https://maps.google.com/?cid=5737974194335217575,,2025-01-04
4,好的寵物沙龍,ChIJuelpNAgCaDQRrZhzf8lp5jE,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區自立路163號,229401798.0,"['星期一: 休息', '星期二: 11:00 – 20:00', '星期三: 11:00 ...",4.5,24,121.523071,24.992544,https://maps.google.com/?cid=3595677666649544877,,2022-04-27
5,W.E. Dog salon,ChIJI-nNvHkCaDQRKcWiFN_M-yc,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區景平路403號,222428920.0,"['星期一: 11:00 – 19:00', '星期二: 休息', '星期三: 休息', '...",5.0,63,121.502246,24.994587,https://maps.google.com/?cid=2881121645129549097,https://www.facebook.com/WEDOGfloat/,2020-08-21
6,貝比寵物美容,ChIJObyEWn8CaDQRSMkTIwtN-Ac,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區中正路529號,222469369.0,"['星期一: 休息', '星期二: 13:00 – 21:30', '星期三: 13:00 ...",4.5,6,121.487857,24.994069,https://maps.google.com/?cid=574293662718216520,,2023-04-28
7,一三三寵物美容院｜專屬貓咪美容空間｜全店透明隔間｜狗狗洗澡美容｜貓咪洗澡美容｜無烘箱純手吹,ChIJL-DO4bMDaDQRo8y965RNqd0,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區安平路7號,229453133.0,"['星期一: 休息', '星期二: 10:00 – 19:30', '星期三: 10:00 ...",4.8,74,121.508873,24.993659,https://maps.google.com/?cid=15972382855451757731,https://www.facebook.com/133petbeautysalon/,2020-11-18
8,咘咘寵物生活坊,ChIJ86WDhnUCaDQRMXpmqvHrHSA,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區安平路64號,229415338.0,"['星期一: 休息', '星期二: 休息', '星期三: 11:00 – 19:00', '...",4.5,71,121.510787,24.996157,https://maps.google.com/?cid=2314265206716791345,https://m.facebook.com/profile.php/?id=1000639...,2022-07-16
9,捉迷腸寵物館,ChIJ5WPmAcMDaDQRzU04gg20-3Y,OPERATIONAL,2025/10/30 09:43:54,1,2025/10/30,235台灣新北市中和區大勇街25巷15弄13號,970732392.0,"['星期一: 休息', '星期二: 10:00 – 19:00', '星期三: 10:00 ...",5.0,51,121.515562,24.993374,https://maps.google.com/?cid=8573644285742173645,https://www.facebook.com/dooooooglove/?ref=pag...,2023-07-23


In [51]:
# 新增類別
df_combine["category"] = "寵物美容"

# 先將地址全部做簡體轉繁體
cc = OpenCC("s2t")
df_combine["address"] = df_combine["address"].apply(cc.convert)

# 清理一些地址中會出現的錯誤
df_combine["address"] = df_combine["address"].str.replace("区", "區")

In [52]:
# 準備處理地址欄位，先編寫規則
df_combine["city"] = None
df_combine["district"] = None

pattern1 = r"([^\d\s]{2}市)([^\d\s]{1,2}區)"
pattern2 = r"灣([^\d\s]{1,2}區)"
pattern3 = r"([^\d\s]{2}區)([^\d\s]{2}市)"

In [53]:
# 處理第一種情況：正常地址格式
mask1 = df_combine["address"].str.contains(pattern1, regex=True, na=False)
extracted1 = df_combine.loc[mask1, "address"].str.extract(pattern1)
df_combine.loc[mask1, ["city", "district"]] = extracted1.values

  mask1 = df_combine["address"].str.contains(pattern1, regex=True, na=False)


In [54]:
# 處理第二種情況：地址格式倒反
mask3 = df_combine["address"].str.contains(pattern3, regex=True, na=False)
extracted3 = df_combine.loc[mask3, "address"].str.extract(pattern3)

df_combine.loc[mask3, "city"] = extracted3[1].values
df_combine.loc[mask3, "district"] = extracted3[0].values

  mask3 = df_combine["address"].str.contains(pattern3, regex=True, na=False)


In [55]:
# 處理第三種狀況：只有區沒有市
mask2 = df_combine["address"].str.contains(pattern2, regex=True, na=False)
extracted2 = df_combine.loc[mask2, "address"].str.extract(pattern2)
df_combine.loc[mask2, "district"] = extracted2[0].values

  mask2 = df_combine["address"].str.contains(pattern2, regex=True, na=False)


In [56]:
# 清理區中不乾淨的字元
drop_word = ["路", "街", "巷", "弄", "段", "道"]
word_remove = "[" + "".join(drop_word) + "]"

df_combine["district"] = df_combine["district"].str.replace(word_remove, "", regex=True)

In [None]:
# # 先取出正常地址格式中的市和區
# mask_city = df_combine["address"].str.contains("市")

# pattern1 = r"([^\d\s]{2}市)([^\d\s]{1,2}區)"
# pattern2 = r"灣([^\d\s]{1,2}區)"

# df_combine[mask_city]

# df_combine.loc[mask_city, ["city", "district"]] = df_combine.loc[mask_city, "address"].str.extract(pattern1)
# df_combine.loc[~mask_city, ["city", "district"]] = df_combine.loc[~mask_city, "address"].str.extract(pattern2)

In [None]:
# # 再取出反向地址格式中的市和區
# pattern3 = r"([^\d\s]{2}區)([^\d\s]{2}市)"

# mask = df_combine["city"].isna() & df_combine["district"].isna()
# extracted = df_combine.loc[mask, "address"].str.extract(pattern3)
# df_combine.loc[mask, ["district", "city"]] = extracted.values

# # 清理區中不乾淨的字元
# drop_word = ["路", "街", "巷", "弄", "段", "道"]
# df_combine["district"] = df_combine["district"].str.replace()

In [57]:
load_dotenv()

username = os.getenv("MYSQL_USERNAME")
password = os.getenv("MYSQL_PASSWORD")
target_ip = os.getenv("MYSQL_IP")
target_port = int(os.getenv("MYSQL_PORTT"))
db_name = os.getenv("MYSQL_DB_NAME")

engine = create_engine(
    f"mysql+pymysql://{username}:{password}@{target_ip}:{target_port}/{db_name}")

In [58]:
# 讀取地區資料，並與店家表join，留下loc id，並將city和district欄位去除
sql_loc = "SELECT * FROM location"

df_loc = pd.read_sql(sql_loc, engine)

df_loc_select = df_loc[["loc_id", "city", "district"]]

df_final = df_combine.merge(df_loc_select, how="left", on=["district"])

df_final.drop(columns=["district"], axis=1, inplace=True)

In [59]:
# 讀取類別資料，並與店家表join，留下類別id，將類別欄位去除
sql_catego = "SELECT * FROM Category"

df_catego = pd.read_sql(sql_catego, engine)

df_final = df_final.merge(df_catego, how="left",
                          left_on="category", right_on="category_name")

df_final.drop(columns=["category_name", "category_eng"], axis=1, inplace=True)

In [60]:
# 加上類別id
df_final["id"] = ""

df_final = pdm.reassign_id(df_final, "id", "sal")

In [63]:
def trans_ophours_columns(value):
    if pd.isna(value):
        return "NaN"
    elif value == "NaN":
        return value
    try:
        return ast.literal_eval(value)
    except (ValueError, SyntaxError):
        return "NaN"

In [64]:
df_combine["opening_hours"] = df_combine["opening_hours"].apply(trans_ophours_columns)

op_hours_list = []
for index, row in df_final.iterrows():
    op_time = row["opening_hours"]
    op_hours = dtm.trans_op_time_to_hours(op_time)
    op_hours_list.append(op_hours)

df_final["op_hours"] = op_hours_list
df_final.drop(columns="opening_hours", axis=1, inplace=True)

IndexError: list index out of range

In [13]:
new_col = ["id", 'name', 'buss_status', 'loc_id', 'address', 'phone', "op_hours", 'category_id', 'rating',
           'rating_total', 'newest_review', 'longitude', 'latitude', 'map_url', 'website', 'place_id', 'update_date']

df_final = df_final[new_col]

In [14]:
df_final

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,newest_review,longitude,latitude,map_url,website,place_id,update_date
0,sal0001,萊德寵物美學館,OPERATIONAL,NTP013,236臺灣新北市土城區延平街77號,222601679.0,51.0,4,4.5,43,2023-07-22,121.467541,24.993201,https://maps.google.com/?cid=4913337813120203896,https://www.facebook.com/%E8%90%8A%E5%BE%B7%E5...,ChIJ_Z6eewoDaDQReDyGhLWuL0Q,2025/10/30
1,sal0002,寵物姐姐,OPERATIONAL,NTP003,235臺灣新北市中和區景平路578號1樓,222471077.0,45.0,4,4.8,57,2025-03-23,121.50161,24.995601,https://maps.google.com/?cid=16898757019426308807,,ChIJdTvTIQYDaDQRx0KLyPlxhOo,2025/10/30
2,sal0003,就是狗寵物美容店,OPERATIONAL,,234臺灣永和區智光街103號,289432787.0,53.0,4,4.6,21,2019-02-03,121.516209,24.996687,https://maps.google.com/?cid=7807708563687520853,,ChIJWdc0RuCpQjQRVbpeWJKNWmw,2025/10/30
3,sal0004,唯心寵物澡堂,OPERATIONAL,NTP013,236臺灣新北市土城區延吉街168巷16號,222624789.0,48.0,4,4.9,37,2025-01-04,121.468022,24.986313,https://maps.google.com/?cid=5737974194335217575,,ChIJa6IDlM8DaDQRp3e17yBhoU8,2025/10/30
4,sal0005,好的寵物沙龍,OPERATIONAL,NTP003,235臺灣新北市中和區自立路163號,229401798.0,60.0,4,4.5,24,2022-04-27,121.523071,24.992544,https://maps.google.com/?cid=3595677666649544877,,ChIJuelpNAgCaDQRrZhzf8lp5jE,2025/10/30
5,sal0006,W.E. Dog salon,OPERATIONAL,NTP003,235臺灣新北市中和區景平路403號,222428920.0,40.0,4,5.0,63,2020-08-21,121.502246,24.994587,https://maps.google.com/?cid=2881121645129549097,https://www.facebook.com/WEDOGfloat/,ChIJI-nNvHkCaDQRKcWiFN_M-yc,2025/10/30
6,sal0007,貝比寵物美容,OPERATIONAL,NTP003,235臺灣新北市中和區中正路529號,222469369.0,51.0,4,4.5,6,2023-04-28,121.487857,24.994069,https://maps.google.com/?cid=574293662718216520,,ChIJObyEWn8CaDQRSMkTIwtN-Ac,2025/10/30
7,sal0008,一三三寵物美容院｜專屬貓咪美容空間｜全店透明隔間｜狗狗洗澡美容｜貓咪洗澡美容｜無烘箱純手吹,OPERATIONAL,NTP003,235臺灣新北市中和區安平路7號,229453133.0,57.0,4,4.8,74,2020-11-18,121.508873,24.993659,https://maps.google.com/?cid=15972382855451757731,https://www.facebook.com/133petbeautysalon/,ChIJL-DO4bMDaDQRo8y965RNqd0,2025/10/30
8,sal0009,咘咘寵物生活坊,OPERATIONAL,NTP003,235臺灣新北市中和區安平路64號,229415338.0,40.0,4,4.5,71,2022-07-16,121.510787,24.996157,https://maps.google.com/?cid=2314265206716791345,https://m.facebook.com/profile.php/?id=1000639...,ChIJ86WDhnUCaDQRMXpmqvHrHSA,2025/10/30
9,sal0010,捉迷腸寵物館,OPERATIONAL,NTP003,235臺灣新北市中和區大勇街25巷15弄13號,970732392.0,45.0,4,5.0,51,2023-07-23,121.515562,24.993374,https://maps.google.com/?cid=8573644285742173645,https://www.facebook.com/dooooooglove/?ref=pag...,ChIJ5WPmAcMDaDQRzU04gg20-3Y,2025/10/30


In [None]:
df_final.to_sql(name="salon", con=engine, index=False, if_exists="replace")

51

In [28]:
df_temp = pd.DataFrame({
    "address":["242臺灣新北市新莊區後港一路76巷5弄11號", "242臺灣新莊區後港一路76巷5弄11號", "242臺灣後港一路新莊區新北市76巷5弄11號"]
})

df_temp

Unnamed: 0,address
0,242臺灣新北市新莊區後港一路76巷5弄11號
1,242臺灣新莊區後港一路76巷5弄11號
2,242臺灣後港一路新莊區新北市76巷5弄11號


In [None]:
# 先取出正常地址格式中的市和區
df_temp["city"] = None
df_temp["district"] = None

pattern1 = r"([^\d\s]{2}市)([^\d\s]{1,2}區)"
pattern2 = r"灣([^\d\s]{1,2}區)"
pattern3 = r"([^\d\s]{2}區)([^\d\s]{2}市)"

mask1 = df_temp["address"].str.contains(pattern1, regex=True, na=False)
extracted1 = df_temp.loc[mask1, "address"].str.extract(pattern1)
df_temp.loc[mask1, ["city", "district"]] = extracted1.values

# 2. 用 pattern3 擷取（區、市），再調換順序放入 city、district
mask3 = df_temp["address"].str.contains(pattern3, regex=True, na=False)
extracted3 = df_temp.loc[mask3, "address"].str.extract(pattern3)
# 調換欄位順序：pattern3擷取結果第一欄是區，第二欄是市
df_temp.loc[mask3, "city"] = extracted3[1].values
df_temp.loc[mask3, "district"] = extracted3[0].values

# 3. 用 pattern2 擷取只有區的狀況，city 保持 None
mask2 = df_temp["address"].str.contains(pattern2, regex=True, na=False)
extracted2 = df_temp.loc[mask2, "address"].str.extract(pattern2)
df_temp.loc[mask2, "district"] = extracted2[0].values

df_temp

  mask1 = df_temp["address"].str.contains(pattern1, regex=True, na=False)
  mask3 = df_temp["address"].str.contains(pattern3, regex=True, na=False)
  mask2 = df_temp["address"].str.contains(pattern2, regex=True, na=False)


Unnamed: 0,address,city,district
0,242臺灣新北市新莊區後港一路76巷5弄11號,新北市,新莊區
1,242臺灣新莊區後港一路76巷5弄11號,,新莊區
2,242臺灣後港一路新莊區新北市76巷5弄11號,新北市,新莊區
