In [1]:
import pandas as pd
from datetime import datetime, date, timedelta
import pymysql
import os
from dotenv import load_dotenv
import numpy as np

In [2]:
def create_pymysql_connect():
    """
    自動透過pymysql建立連線，回傳conn連線物件。
    所需各項資料請寫入.env檔案中。請勿直接寫於程式中。
    """

    load_dotenv()

    username = os.getenv("MYSQL_USERNAME")
    password = os.getenv("MYSQL_PASSWORD")
    target_ip = os.getenv("MYSQL_IP")
    target_port = int(os.getenv("MYSQL_PORTT"))
    db_name = os.getenv("MYSQL_DB_NAME")

    conn = pymysql.connect(
        host=target_ip,
        port=target_port,
        user=username,
        password=password,
        database=db_name,
        charset='utf8mb4'
    )

    return conn


def E_load_from_sql(table_name: str) -> pd.DataFrame:
    """
    輸入欲查詢的表名table_name，透過pymysql連線資料庫，
    並取得該表後將其轉成dataframe。

    連線所需資訊請寫入.env中，請勿寫入程式中。
    """

    conn = create_pymysql_connect()
    sql = f"SELECT * FROM {table_name}"

    try:
        df = pd.read_sql(sql, conn)
        return df.to_dict(orient='records')

    except Exception as e:
        raise Exception(f"讀取{table_name}表時發生錯誤：{e}")

In [3]:
data_salon = E_load_from_sql(table_name="salon")
df_salon = pd.DataFrame(data=data_salon)

data_hotel = E_load_from_sql(table_name="hotel")
df_hotel = pd.DataFrame(data=data_hotel)

data_hospital = E_load_from_sql(table_name="hospital")
df_hospital = pd.DataFrame(data=data_hospital)

data_supplies = E_load_from_sql(table_name="supplies")
df_supplies = pd.DataFrame(data=data_supplies)

data_restaurant = E_load_from_sql(table_name="restaurant")
df_restaurant = pd.DataFrame(data=data_restaurant)

data_shelter = E_load_from_sql(table_name="shelter")
df_shelter = pd.DataFrame(data=data_shelter)

  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)


In [4]:
df_main = pd.concat([df_salon, df_hotel, df_hospital, df_supplies, df_restaurant, df_shelter], ignore_index=True)
df_main["city_id"] = df_main["loc_id"].str[:3]

In [5]:
data_loc = E_load_from_sql(table_name="location")
df_loc = pd.DataFrame(data=data_loc)

  df = pd.read_sql(sql, conn)


In [6]:
data_pet = E_load_from_sql(table_name="pet_regis")
df_pet = df_loc = pd.DataFrame(data=data_pet)
df_pet["city_id"] = df_pet["loc_id"].str[:3]

  df = pd.read_sql(sql, conn)


In [7]:
# 計算w_area_cat值
def T_calculate_w_area_cat(df: pd.DataFrame, t: int = 30) -> pd.DataFrame:
    loc_store_count = df.groupby(["loc_id", "category_id"]).size().reset_index(name="store_count")

    loc_store_count["w_area_cat"] = loc_store_count["store_count"] / (loc_store_count["store_count"] + t)

    df = df.merge(loc_store_count, how="left", on=["loc_id", "category_id"])

    return df

df_main = T_calculate_w_area_cat(df=df_main)

df_main.head(15)

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,newest_review,longitude,latitude,map_url,website,place_id,update_time,city_id,store_count,w_area_cat
0,sal0001,毛樂居寵物美容工作室,OPERATIONAL,NTP029,233臺灣新北市烏來區溫泉街47號,,45.0,4,0.0,0.0,,121.551585,24.861427,https://maps.google.com/?cid=11355879125359784900,,ChIJrcvWcAAHaDQRxPNXA-YvmJ0,2025/10/30 09:43:54,NTP,1.0,0.032258
1,sal0002,星光貓舍｜曼赤肯｜英短｜小步舞曲｜英長｜貓咪洗澡美容｜貓咪住宿 ｜新北貓舍,OPERATIONAL,NTP008,239臺灣新北市鶯歌區三鶯路106號,,0.0,4,5.0,49.0,2025-08-19,121.359142,24.943754,https://maps.google.com/?cid=12376728134590950362,,ChIJAzTNiZwbaDQR2mvg56n4wqs,2025/10/30 09:43:54,NTP,15.0,0.333333
2,sal0003,新同學寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區復興路141號,226716300.0,48.0,4,5.0,4.0,2025-01-16,121.371062,24.938147,https://maps.google.com/?cid=14774640014360391684,,ChIJXw9AYwAbaDQRBDA5TNAQCs0,2025/10/30 09:43:54,NTP,16.0,0.347826
3,sal0004,妙比寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區文化路20號2樓,226738321.0,40.0,4,4.8,39.0,2020-11-09,121.373299,24.935908,https://maps.google.com/?cid=13996134824851865085,http://www.facebook.com/mubepet,ChIJzQCMsvMbaDQR_f3E73dCPMI,2025/10/30 09:43:54,NTP,16.0,0.347826
4,sal0005,瑪妮寵物,OPERATIONAL,NTP008,239臺灣新北市鶯歌區中正三路278號,226771300.0,0.0,4,4.6,7.0,2023-08-18,121.328898,24.936068,https://maps.google.com/?cid=17819326260165638416,,ChIJ9dHkBD8ZaDQREHn2odL2Svc,2025/10/30 09:43:54,NTP,15.0,0.333333
5,sal0006,三峽北大QMO寵物美容 貓狗美容,OPERATIONAL,NTP009,"1樓 No. 13號, 1樓, No. 13號中華路75巷三峽區新北市臺灣 237",286713013.0,43.0,4,4.6,57.0,2024-11-07,121.372829,24.929963,https://maps.google.com/?cid=11716146678371440082,https://www.facebook.com/QMOpet/,ChIJFfwMRewbaDQR0tWzRFQdmKI,2025/10/30 09:43:54,NTP,16.0,0.347826
6,sal0007,哈哈窩寵物精品館(新北三峽店),OPERATIONAL,NTP009,237臺灣新北市三峽區文化路66號,226742799.0,84.0,4,4.3,101.0,2025-07-22,121.372381,24.936215,https://maps.google.com/?cid=16502815234519480600,https://www.facebook.com/%E5%93%88%E5%93%88%E7...,ChIJbwWMy_MbaDQRGDlwJwXHBeU,2025/10/30 09:43:54,NTP,16.0,0.347826
7,sal0008,台北寵物美容 寵物美容教學 台北寵物美容教學,OPERATIONAL,NTP009,237臺灣新北市三峽區國學街1號,956000075.0,63.0,4,4.2,124.0,2022-07-21,121.373407,24.941599,https://maps.google.com/?cid=11378918088627659057,http://www.amopetgrooming.com/,ChIJjTvRsfUbaDQRMSHrqLYJ6p0,2025/10/30 09:43:54,NTP,16.0,0.347826
8,sal0009,幸福狗窩,OPERATIONAL,NTP009,"No. 165, No. 165號國光街三峽區新北市臺灣 237",226738299.0,52.5,4,3.8,19.0,2018-04-15,121.371542,24.939423,https://maps.google.com/?cid=7109624230124698862,,ChIJw74CZXUcaDQR7rwXo5F1qmI,2025/10/30 09:43:54,NTP,16.0,0.347826
9,sal0010,浪花寵藝寵物美容．用品．住宿,OPERATIONAL,NTP009,237臺灣新北市三峽區長泰街48號一樓,929387655.0,45.0,4,4.9,64.0,2023-08-14,121.378034,24.932523,https://maps.google.com/?cid=3879997136574198516,https://instagram.com/groomer.mei?igshid=MWI4M...,ChIJeza4mE4baDQR9LIJr9aE2DU,2025/10/30 09:43:54,NTP,16.0,0.347826


In [8]:
def T_calculate_P75_score(df: pd.DataFrame) -> pd.DataFrame:
    # 計算P75_city_area_cat
    P75_district_rating_total = df.groupby(["loc_id", "category_id"])["rating_total"].quantile(0.75).reset_index(name="P75_district_rating_total")

    # 計算P75_city_cat
    P75_city_rating_total = df.groupby(["city_id", "category_id"])["rating_total"].quantile(0.75).reset_index(name="P75_city_rating_total")

    # merge回店家總表
    df = df.merge(P75_district_rating_total, how="left", on=["loc_id", "category_id"])
    df = df.merge(P75_city_rating_total, how="left", on=["city_id", "category_id"])

    return df

df_main = T_calculate_P75_score(df=df_main)

df_main.head(15)

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,...,latitude,map_url,website,place_id,update_time,city_id,store_count,w_area_cat,P75_district_rating_total,P75_city_rating_total
0,sal0001,毛樂居寵物美容工作室,OPERATIONAL,NTP029,233臺灣新北市烏來區溫泉街47號,,45.0,4,0.0,0.0,...,24.861427,https://maps.google.com/?cid=11355879125359784900,,ChIJrcvWcAAHaDQRxPNXA-YvmJ0,2025/10/30 09:43:54,NTP,1.0,0.032258,0.0,95.75
1,sal0002,星光貓舍｜曼赤肯｜英短｜小步舞曲｜英長｜貓咪洗澡美容｜貓咪住宿 ｜新北貓舍,OPERATIONAL,NTP008,239臺灣新北市鶯歌區三鶯路106號,,0.0,4,5.0,49.0,...,24.943754,https://maps.google.com/?cid=12376728134590950362,,ChIJAzTNiZwbaDQR2mvg56n4wqs,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75
2,sal0003,新同學寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區復興路141號,226716300.0,48.0,4,5.0,4.0,...,24.938147,https://maps.google.com/?cid=14774640014360391684,,ChIJXw9AYwAbaDQRBDA5TNAQCs0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75
3,sal0004,妙比寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區文化路20號2樓,226738321.0,40.0,4,4.8,39.0,...,24.935908,https://maps.google.com/?cid=13996134824851865085,http://www.facebook.com/mubepet,ChIJzQCMsvMbaDQR_f3E73dCPMI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75
4,sal0005,瑪妮寵物,OPERATIONAL,NTP008,239臺灣新北市鶯歌區中正三路278號,226771300.0,0.0,4,4.6,7.0,...,24.936068,https://maps.google.com/?cid=17819326260165638416,,ChIJ9dHkBD8ZaDQREHn2odL2Svc,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75
5,sal0006,三峽北大QMO寵物美容 貓狗美容,OPERATIONAL,NTP009,"1樓 No. 13號, 1樓, No. 13號中華路75巷三峽區新北市臺灣 237",286713013.0,43.0,4,4.6,57.0,...,24.929963,https://maps.google.com/?cid=11716146678371440082,https://www.facebook.com/QMOpet/,ChIJFfwMRewbaDQR0tWzRFQdmKI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75
6,sal0007,哈哈窩寵物精品館(新北三峽店),OPERATIONAL,NTP009,237臺灣新北市三峽區文化路66號,226742799.0,84.0,4,4.3,101.0,...,24.936215,https://maps.google.com/?cid=16502815234519480600,https://www.facebook.com/%E5%93%88%E5%93%88%E7...,ChIJbwWMy_MbaDQRGDlwJwXHBeU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75
7,sal0008,台北寵物美容 寵物美容教學 台北寵物美容教學,OPERATIONAL,NTP009,237臺灣新北市三峽區國學街1號,956000075.0,63.0,4,4.2,124.0,...,24.941599,https://maps.google.com/?cid=11378918088627659057,http://www.amopetgrooming.com/,ChIJjTvRsfUbaDQRMSHrqLYJ6p0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75
8,sal0009,幸福狗窩,OPERATIONAL,NTP009,"No. 165, No. 165號國光街三峽區新北市臺灣 237",226738299.0,52.5,4,3.8,19.0,...,24.939423,https://maps.google.com/?cid=7109624230124698862,,ChIJw74CZXUcaDQR7rwXo5F1qmI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75
9,sal0010,浪花寵藝寵物美容．用品．住宿,OPERATIONAL,NTP009,237臺灣新北市三峽區長泰街48號一樓,929387655.0,45.0,4,4.9,64.0,...,24.932523,https://maps.google.com/?cid=3879997136574198516,https://instagram.com/groomer.mei?igshid=MWI4M...,ChIJeza4mE4baDQR9LIJr9aE2DU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75


In [9]:
def T_calculate_mscore(df: pd.DataFrame) -> pd.DataFrame:
    # 計算m_city_area_cat
    df["m_city_area_cat"] = (df["w_area_cat"] * df["P75_district_rating_total"]) + ((1 - df["w_area_cat"]) * df["P75_city_rating_total"])

    return df

df_main = T_calculate_mscore(df=df_main)

df_main.head(15)

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,...,map_url,website,place_id,update_time,city_id,store_count,w_area_cat,P75_district_rating_total,P75_city_rating_total,m_city_area_cat
0,sal0001,毛樂居寵物美容工作室,OPERATIONAL,NTP029,233臺灣新北市烏來區溫泉街47號,,45.0,4,0.0,0.0,...,https://maps.google.com/?cid=11355879125359784900,,ChIJrcvWcAAHaDQRxPNXA-YvmJ0,2025/10/30 09:43:54,NTP,1.0,0.032258,0.0,95.75,92.66129
1,sal0002,星光貓舍｜曼赤肯｜英短｜小步舞曲｜英長｜貓咪洗澡美容｜貓咪住宿 ｜新北貓舍,OPERATIONAL,NTP008,239臺灣新北市鶯歌區三鶯路106號,,0.0,4,5.0,49.0,...,https://maps.google.com/?cid=12376728134590950362,,ChIJAzTNiZwbaDQR2mvg56n4wqs,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333
2,sal0003,新同學寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區復興路141號,226716300.0,48.0,4,5.0,4.0,...,https://maps.google.com/?cid=14774640014360391684,,ChIJXw9AYwAbaDQRBDA5TNAQCs0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087
3,sal0004,妙比寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區文化路20號2樓,226738321.0,40.0,4,4.8,39.0,...,https://maps.google.com/?cid=13996134824851865085,http://www.facebook.com/mubepet,ChIJzQCMsvMbaDQR_f3E73dCPMI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087
4,sal0005,瑪妮寵物,OPERATIONAL,NTP008,239臺灣新北市鶯歌區中正三路278號,226771300.0,0.0,4,4.6,7.0,...,https://maps.google.com/?cid=17819326260165638416,,ChIJ9dHkBD8ZaDQREHn2odL2Svc,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333
5,sal0006,三峽北大QMO寵物美容 貓狗美容,OPERATIONAL,NTP009,"1樓 No. 13號, 1樓, No. 13號中華路75巷三峽區新北市臺灣 237",286713013.0,43.0,4,4.6,57.0,...,https://maps.google.com/?cid=11716146678371440082,https://www.facebook.com/QMOpet/,ChIJFfwMRewbaDQR0tWzRFQdmKI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087
6,sal0007,哈哈窩寵物精品館(新北三峽店),OPERATIONAL,NTP009,237臺灣新北市三峽區文化路66號,226742799.0,84.0,4,4.3,101.0,...,https://maps.google.com/?cid=16502815234519480600,https://www.facebook.com/%E5%93%88%E5%93%88%E7...,ChIJbwWMy_MbaDQRGDlwJwXHBeU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087
7,sal0008,台北寵物美容 寵物美容教學 台北寵物美容教學,OPERATIONAL,NTP009,237臺灣新北市三峽區國學街1號,956000075.0,63.0,4,4.2,124.0,...,https://maps.google.com/?cid=11378918088627659057,http://www.amopetgrooming.com/,ChIJjTvRsfUbaDQRMSHrqLYJ6p0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087
8,sal0009,幸福狗窩,OPERATIONAL,NTP009,"No. 165, No. 165號國光街三峽區新北市臺灣 237",226738299.0,52.5,4,3.8,19.0,...,https://maps.google.com/?cid=7109624230124698862,,ChIJw74CZXUcaDQR7rwXo5F1qmI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087
9,sal0010,浪花寵藝寵物美容．用品．住宿,OPERATIONAL,NTP009,237臺灣新北市三峽區長泰街48號一樓,929387655.0,45.0,4,4.9,64.0,...,https://maps.google.com/?cid=3879997136574198516,https://instagram.com/groomer.mei?igshid=MWI4M...,ChIJeza4mE4baDQR9LIJr9aE2DU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087


In [10]:
def T_calculate_rating_avg(df: pd.DataFrame) -> pd.DataFrame:
    district_rating_avg = df.groupby(["loc_id", "category_id"])["rating"].mean().reset_index(name="district_rating_avg")
    df = df.merge(district_rating_avg, how="left", on=["loc_id", "category_id"])

    return df

df_main = T_calculate_rating_avg(df=df_main)

df_main.head(15)

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,...,website,place_id,update_time,city_id,store_count,w_area_cat,P75_district_rating_total,P75_city_rating_total,m_city_area_cat,district_rating_avg
0,sal0001,毛樂居寵物美容工作室,OPERATIONAL,NTP029,233臺灣新北市烏來區溫泉街47號,,45.0,4,0.0,0.0,...,,ChIJrcvWcAAHaDQRxPNXA-YvmJ0,2025/10/30 09:43:54,NTP,1.0,0.032258,0.0,95.75,92.66129,0.0
1,sal0002,星光貓舍｜曼赤肯｜英短｜小步舞曲｜英長｜貓咪洗澡美容｜貓咪住宿 ｜新北貓舍,OPERATIONAL,NTP008,239臺灣新北市鶯歌區三鶯路106號,,0.0,4,5.0,49.0,...,,ChIJAzTNiZwbaDQR2mvg56n4wqs,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333,4.08
2,sal0003,新同學寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區復興路141號,226716300.0,48.0,4,5.0,4.0,...,,ChIJXw9AYwAbaDQRBDA5TNAQCs0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6
3,sal0004,妙比寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區文化路20號2樓,226738321.0,40.0,4,4.8,39.0,...,http://www.facebook.com/mubepet,ChIJzQCMsvMbaDQR_f3E73dCPMI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6
4,sal0005,瑪妮寵物,OPERATIONAL,NTP008,239臺灣新北市鶯歌區中正三路278號,226771300.0,0.0,4,4.6,7.0,...,,ChIJ9dHkBD8ZaDQREHn2odL2Svc,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333,4.08
5,sal0006,三峽北大QMO寵物美容 貓狗美容,OPERATIONAL,NTP009,"1樓 No. 13號, 1樓, No. 13號中華路75巷三峽區新北市臺灣 237",286713013.0,43.0,4,4.6,57.0,...,https://www.facebook.com/QMOpet/,ChIJFfwMRewbaDQR0tWzRFQdmKI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6
6,sal0007,哈哈窩寵物精品館(新北三峽店),OPERATIONAL,NTP009,237臺灣新北市三峽區文化路66號,226742799.0,84.0,4,4.3,101.0,...,https://www.facebook.com/%E5%93%88%E5%93%88%E7...,ChIJbwWMy_MbaDQRGDlwJwXHBeU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6
7,sal0008,台北寵物美容 寵物美容教學 台北寵物美容教學,OPERATIONAL,NTP009,237臺灣新北市三峽區國學街1號,956000075.0,63.0,4,4.2,124.0,...,http://www.amopetgrooming.com/,ChIJjTvRsfUbaDQRMSHrqLYJ6p0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6
8,sal0009,幸福狗窩,OPERATIONAL,NTP009,"No. 165, No. 165號國光街三峽區新北市臺灣 237",226738299.0,52.5,4,3.8,19.0,...,,ChIJw74CZXUcaDQR7rwXo5F1qmI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6
9,sal0010,浪花寵藝寵物美容．用品．住宿,OPERATIONAL,NTP009,237臺灣新北市三峽區長泰街48號一樓,929387655.0,45.0,4,4.9,64.0,...,https://instagram.com/groomer.mei?igshid=MWI4M...,ChIJeza4mE4baDQR9LIJr9aE2DU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6


In [11]:
def T_calculate_avb_score(df: pd.DataFrame) -> pd.DataFrame:
    df["avb_score"] = (df["op_hours"]*0.5) + 0.5

    return df

df_main = T_calculate_avb_score(df=df_main)

df_main.head(15)

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,...,place_id,update_time,city_id,store_count,w_area_cat,P75_district_rating_total,P75_city_rating_total,m_city_area_cat,district_rating_avg,avb_score
0,sal0001,毛樂居寵物美容工作室,OPERATIONAL,NTP029,233臺灣新北市烏來區溫泉街47號,,45.0,4,0.0,0.0,...,ChIJrcvWcAAHaDQRxPNXA-YvmJ0,2025/10/30 09:43:54,NTP,1.0,0.032258,0.0,95.75,92.66129,0.0,23.0
1,sal0002,星光貓舍｜曼赤肯｜英短｜小步舞曲｜英長｜貓咪洗澡美容｜貓咪住宿 ｜新北貓舍,OPERATIONAL,NTP008,239臺灣新北市鶯歌區三鶯路106號,,0.0,4,5.0,49.0,...,ChIJAzTNiZwbaDQR2mvg56n4wqs,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333,4.08,0.5
2,sal0003,新同學寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區復興路141號,226716300.0,48.0,4,5.0,4.0,...,ChIJXw9AYwAbaDQRBDA5TNAQCs0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,24.5
3,sal0004,妙比寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區文化路20號2樓,226738321.0,40.0,4,4.8,39.0,...,ChIJzQCMsvMbaDQR_f3E73dCPMI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,20.5
4,sal0005,瑪妮寵物,OPERATIONAL,NTP008,239臺灣新北市鶯歌區中正三路278號,226771300.0,0.0,4,4.6,7.0,...,ChIJ9dHkBD8ZaDQREHn2odL2Svc,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333,4.08,0.5
5,sal0006,三峽北大QMO寵物美容 貓狗美容,OPERATIONAL,NTP009,"1樓 No. 13號, 1樓, No. 13號中華路75巷三峽區新北市臺灣 237",286713013.0,43.0,4,4.6,57.0,...,ChIJFfwMRewbaDQR0tWzRFQdmKI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,22.0
6,sal0007,哈哈窩寵物精品館(新北三峽店),OPERATIONAL,NTP009,237臺灣新北市三峽區文化路66號,226742799.0,84.0,4,4.3,101.0,...,ChIJbwWMy_MbaDQRGDlwJwXHBeU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,42.5
7,sal0008,台北寵物美容 寵物美容教學 台北寵物美容教學,OPERATIONAL,NTP009,237臺灣新北市三峽區國學街1號,956000075.0,63.0,4,4.2,124.0,...,ChIJjTvRsfUbaDQRMSHrqLYJ6p0,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,32.0
8,sal0009,幸福狗窩,OPERATIONAL,NTP009,"No. 165, No. 165號國光街三峽區新北市臺灣 237",226738299.0,52.5,4,3.8,19.0,...,ChIJw74CZXUcaDQR7rwXo5F1qmI,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,26.75
9,sal0010,浪花寵藝寵物美容．用品．住宿,OPERATIONAL,NTP009,237臺灣新北市三峽區長泰街48號一樓,929387655.0,45.0,4,4.9,64.0,...,ChIJeza4mE4baDQR9LIJr9aE2DU,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,23.0


In [12]:
def T_calculate_store_score(df: pd.DataFrame) -> pd.DataFrame:
    df["store_score"] = (((df["rating"]/5) * (df["rating_total"]/(df["rating_total"]+df["m_city_area_cat"]))) + (
        (df["district_rating_avg"]/5)*(df["m_city_area_cat"]/(df["m_city_area_cat"]+df["rating_total"])))) * df["avb_score"]

    return df

df_main = T_calculate_store_score(df=df_main)

df_main.head(15)

Unnamed: 0,id,name,buss_status,loc_id,address,phone,op_hours,category_id,rating,rating_total,...,update_time,city_id,store_count,w_area_cat,P75_district_rating_total,P75_city_rating_total,m_city_area_cat,district_rating_avg,avb_score,store_score
0,sal0001,毛樂居寵物美容工作室,OPERATIONAL,NTP029,233臺灣新北市烏來區溫泉街47號,,45.0,4,0.0,0.0,...,2025/10/30 09:43:54,NTP,1.0,0.032258,0.0,95.75,92.66129,0.0,23.0,0.0
1,sal0002,星光貓舍｜曼赤肯｜英短｜小步舞曲｜英長｜貓咪洗澡美容｜貓咪住宿 ｜新北貓舍,OPERATIONAL,NTP008,239臺灣新北市鶯歌區三鶯路106號,,0.0,4,5.0,49.0,...,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333,4.08,0.5,0.442065
2,sal0003,新同學寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區復興路141號,226716300.0,48.0,4,5.0,4.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,24.5,22.615693
3,sal0004,妙比寵物美容,OPERATIONAL,NTP009,237臺灣新北市三峽區文化路20號2樓,226738321.0,40.0,4,4.8,39.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,20.5,19.090776
4,sal0005,瑪妮寵物,OPERATIONAL,NTP008,239臺灣新北市鶯歌區中正三路278號,226771300.0,0.0,4,4.6,7.0,...,2025/10/30 09:43:54,NTP,15.0,0.333333,58.5,95.75,83.333333,4.08,0.5,0.41203
5,sal0006,三峽北大QMO寵物美容 貓狗美容,OPERATIONAL,NTP009,"1樓 No. 13號, 1樓, No. 13號中華路75巷三峽區新北市臺灣 237",286713013.0,43.0,4,4.6,57.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,22.0,20.24
6,sal0007,哈哈窩寵物精品館(新北三峽店),OPERATIONAL,NTP009,237臺灣新北市三峽區文化路66號,226742799.0,84.0,4,4.3,101.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,42.5,37.815949
7,sal0008,台北寵物美容 寵物美容教學 台北寵物美容教學,OPERATIONAL,NTP009,237臺灣新北市三峽區國學街1號,956000075.0,63.0,4,4.2,124.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,32.0,28.02017
8,sal0009,幸福狗窩,OPERATIONAL,NTP009,"No. 165, No. 165號國光街三峽區新北市臺灣 237",226738299.0,52.5,4,3.8,19.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,26.75,23.924196
9,sal0010,浪花寵藝寵物美容．用品．住宿,OPERATIONAL,NTP009,237臺灣新北市三峽區長泰街48號一樓,929387655.0,45.0,4,4.9,64.0,...,2025/10/30 09:43:54,NTP,16.0,0.347826,106.75,95.75,99.576087,4.6,23.0,21.699932


In [13]:
def T_calculate_category_raw_score(df_pet: pd.DataFrame, df_main: pd.DataFrame) -> pd.DataFrame:
    df_pet_count = df_pet.groupby("loc_id")["regis_count"].sum().reset_index(name="pet_count")
    sum_store_score = df_main.groupby(["loc_id", "category_id"])["store_score"].sum().reset_index(name="sum_store_score")

    df_ctgry_score = sum_store_score.merge(df_pet_count, how="left", on="loc_id")
    df_ctgry_score["ctgry_raw_score"] = df_ctgry_score["sum_store_score"] / (df_ctgry_score["pet_count"]/10000)

    df_ctgry_score["city_id"] = df_ctgry_score["loc_id"].str[:3]

    return df_ctgry_score

df_ctgry_score = T_calculate_category_raw_score(df_pet=df_pet, df_main=df_main)

df_ctgry_score.head(15)

Unnamed: 0,loc_id,category_id,sum_store_score,pet_count,ctgry_raw_score,city_id
0,KSH001,2,308.210608,3225,955.691809,KSH
1,KSH001,4,18.04,3225,55.937984,KSH
2,KSH001,5,21.62,3225,67.03876,KSH
3,KSH002,1,396.24453,20428,193.97128,KSH
4,KSH002,3,259.49829,20428,127.030688,KSH
5,KSH002,6,12.95,20428,6.339338,KSH
6,KSH002,2,384.098663,20428,188.025584,KSH
7,KSH002,4,283.763592,20428,138.909141,KSH
8,KSH002,5,477.485335,20428,233.740618,KSH
9,KSH003,1,507.564089,21182,239.620475,KSH


In [14]:
def normalize_series(x: pd.Series, p10: pd.Series, p90: pd.Series) -> pd.Series:
    """
    x   : 要轉換的原始分數（Series）
    p10 : 同長度的第10百分位數（Series，已對齊 x）
    p90 : 同長度的第90百分位數（Series，已對齊 x）
    回傳：回傳到 0.5-9.5 的分數
    """
    # 避免 P90==P10 造成除0，先把相等的分母換成NaN
    denom = (p90 - p10).replace(0, pd.NA)
    ratio = (x - p10) / denom
    # 若分母為NaN（等於 0 的情況），或原本就NaN視為0
    ratio = ratio.fillna(0.0)
    # 夾在 [0, 1]
    ratio = ratio.clip(0, 1)
    # 回傳到 [0.5, 9.5]
    return 0.5 + ratio * 9.0


def T_get_normalize_score(df: pd.DataFrame, col_list: list, col_name: str) -> pd.DataFrame:
    df_copy = pd.DataFrame(df)

    group = df_copy.groupby(col_list)["ctgry_raw_score"]
    p10 = group.transform(lambda s: s.quantile(0.10))
    p90 = group.transform(lambda s: s.quantile(0.90))
    df_copy[col_name] = normalize_series(x=df_copy["ctgry_raw_score"], p10=p10, p90=p90)
    df_copy[col_name] = df_copy[col_name].round(2)

    return df_copy

# 先處理市內比較
city_col_list = ["city_id", "category_id"]
df_city = T_get_normalize_score(df=df_ctgry_score, col_list=city_col_list, col_name="norm_city")

# 再處理六都全部
all_col_list = ["category_id"]
df_all = T_get_normalize_score(df=df_ctgry_score, col_list=all_col_list, col_name="norm_all")

  ratio = ratio.fillna(0.0)


In [15]:
def T_merge_city_and_all(df_city: pd.DataFrame, df_all: pd.DataFrame) -> pd.DataFrame:
    df_all = df_all[["loc_id", "category_id", "norm_all"]]
    df_final = df_city.merge(df_all, how="left", on=["loc_id", "category_id"])

    return df_final

df_final = T_merge_city_and_all(df_city=df_city, df_all=df_all)

df_final.head(15)

Unnamed: 0,loc_id,category_id,sum_store_score,pet_count,ctgry_raw_score,city_id,norm_city,norm_all
0,KSH001,2,308.210608,3225,955.691809,KSH,9.5,5.2
1,KSH001,4,18.04,3225,55.937984,KSH,0.58,0.5
2,KSH001,5,21.62,3225,67.03876,KSH,0.5,0.5
3,KSH002,1,396.24453,20428,193.97128,KSH,7.31,6.7
4,KSH002,3,259.49829,20428,127.030688,KSH,2.16,2.81
5,KSH002,6,12.95,20428,6.339338,KSH,0.5,2.11
6,KSH002,2,384.098663,20428,188.025584,KSH,1.1,0.72
7,KSH002,4,283.763592,20428,138.909141,KSH,2.47,1.32
8,KSH002,5,477.485335,20428,233.740618,KSH,2.79,1.87
9,KSH003,1,507.564089,21182,239.620475,KSH,9.23,8.61


In [17]:
def T_add_rank(df: pd.DataFrame) -> pd.DataFrame:
    df["city_rank"] = df.groupby(["city_id", "category_id"])["norm_city"].rank(method="min", ascending=False).astype(int).astype(str)
    df["all_rank"] = df.groupby(["category_id"])["norm_all"].rank(method="min", ascending=False).astype(int).astype(str)

    new_col = ['city_id', 'loc_id', 'category_id', 'pet_count', 'sum_store_score', 'ctgry_raw_score', 'norm_city', 'city_rank', 'norm_all', 'all_rank']
    df = df[new_col]

    return df

df_final = T_add_rank(df=df_final)

In [18]:
df_final.head(30)

Unnamed: 0,city_id,loc_id,category_id,pet_count,sum_store_score,ctgry_raw_score,norm_city,city_rank,norm_all,all_rank
0,KSH,KSH001,2,3225,308.210608,955.691809,9.5,1,5.2,33
1,KSH,KSH001,4,3225,18.04,55.937984,0.58,27,0.5,107
2,KSH,KSH001,5,3225,21.62,67.03876,0.5,30,0.5,117
3,KSH,KSH002,1,20428,396.24453,193.97128,7.31,9,6.7,34
4,KSH,KSH002,3,20428,259.49829,127.030688,2.16,14,2.81,68
5,KSH,KSH002,6,20428,12.95,6.339338,0.5,2,2.11,9
6,KSH,KSH002,2,20428,384.098663,188.025584,1.1,28,0.72,118
7,KSH,KSH002,4,20428,283.763592,138.909141,2.47,22,1.32,101
8,KSH,KSH002,5,20428,477.485335,233.740618,2.79,25,1.87,107
9,KSH,KSH003,1,21182,507.564089,239.620475,9.23,5,8.61,19
