In [2]:
import pandas as pd
import pymysql
from dotenv import load_dotenv
import os

## 前置作業

In [3]:
def create_pymysql_connect():
    """
    自動透過pymysql建立連線，回傳conn連線物件。
    所需各項資料請寫入.env檔案中。請勿直接寫於程式中。
    """

    load_dotenv()

    username = os.getenv("MYSQL_USERNAME")
    password = os.getenv("MYSQL_PASSWORD")
    target_ip = os.getenv("MYSQL_IP")
    target_port = int(os.getenv("MYSQL_PORTT"))
    db_name = os.getenv("MYSQL_DB_NAME")

    conn = pymysql.connect(
        host=target_ip,
        port=target_port,
        user=username,
        password=password,
        database=db_name,
        charset='utf8mb4'
    )

    return conn



def E_load_from_sql(table_name: str) -> pd.DataFrame:
    """
    輸入欲查詢的表名table_name，透過pymysql連線資料庫，
    並取得該表後將其轉成dataframe。

    連線所需資訊請寫入.env中，請勿寫入程式中。
    """

    conn = create_pymysql_connect()
    sql = f"SELECT * FROM {table_name}"

    try:
        df = pd.read_sql(sql, conn)
        return df.to_dict(orient='records')

    except Exception as e:
        raise Exception(f"讀取{table_name}表時發生錯誤：{e}")

In [4]:
# 設定常數
a1 = 0.4
a2 = 0.3
a3 = 0.3

## 計算MarketPotential

In [5]:
# 將寵物數資料讀入
df_pet_data = E_load_from_sql(table_name="pet_regis")

  df = pd.read_sql(sql, conn)


In [6]:
# 將地區人口資料讀入
df_loc_data = E_load_from_sql(table_name="location")

  df = pd.read_sql(sql, conn)


In [7]:
# 先算出各地區的寵物數量
def T_calaulate_pet_count(df_pet_data: list[dict]) -> pd.DataFrame:
    df_pet = pd.DataFrame(data=df_pet_data)

    df_regis = df_pet.groupby("loc_id")["regis_count"].sum().reset_index(name="pet_regis")
    df_removal = df_pet.groupby("loc_id")["removal_count"].sum().reset_index(name="pet_removal")

    df_pet_count = df_regis.merge(df_removal, how="left", on="loc_id")
    df_pet_count["pet_count"] = df_pet_count["pet_regis"] - df_pet_count["pet_removal"]

    return df_pet_count

df_pet_count = T_calaulate_pet_count(df_pet_data=df_pet_data)

In [8]:
# 將地區人口與寵物登記數資料結合
def T_merge_df_loc_pet(df_loc_data: list[dict], df_pet_count: pd.DataFrame) -> pd.DataFrame:
    df_loc = pd.DataFrame(data=df_loc_data)

    df_loc = df_loc.merge(df_pet_count, how="left", on="loc_id")

    return df_loc

df_loc = T_merge_df_loc_pet(df_loc_data=df_loc_data, df_pet_count=df_pet_count)

In [9]:
# 計算MarketPotential_raw
def T_calculate_MarketPotential_raw(df: pd.DataFrame) -> pd.DataFrame:
    df_copy = pd.DataFrame(df)
    df_copy["pet_density"] = df_copy["pet_count"] / df_copy["area"]
    df_copy["pet_1000people"] = (df_copy["pet_count"] / df_copy["population"])*1000
    df_copy["population_density"] = (df_copy["population"] / df_copy["area"])
    df_copy["MarketPotential_raw"] = (
        df_copy["pet_density"] * df_copy["pet_1000people"] * df_copy["population_density"])**(1/3)

    return df_copy

df_MarketPotential = T_calculate_MarketPotential_raw(df=df_loc)

In [10]:
# 計算MarketPotential_raw_norm
def T_calculate_MarketPotential_norm(df: pd.DataFrame) -> pd.DataFrame:
    min = df["MarketPotential_raw"].min()
    max = df["MarketPotential_raw"].max()
    df["MarketPotential_norm"] = (df["MarketPotential_raw"] - min) / (max - min)
    df["MarketPotential_norm"] = df["MarketPotential_norm"].fillna(0)

    return df

df_MarketPotential = T_calculate_MarketPotential_norm(df=df_MarketPotential)

## 計算StoreSaturation

In [11]:
# 將店家資料讀入
data_salon = E_load_from_sql(table_name="salon")
df_salon = pd.DataFrame(data=data_salon)

data_hotel = E_load_from_sql(table_name="hotel")
df_hotel = pd.DataFrame(data=data_hotel)

data_hospital = E_load_from_sql(table_name="hospital")
df_hospital = pd.DataFrame(data=data_hospital)

data_supplies = E_load_from_sql(table_name="supplies")
df_supplies = pd.DataFrame(data=data_supplies)

data_restaurant = E_load_from_sql(table_name="restaurant")
df_restaurant = pd.DataFrame(data=data_restaurant)

data_shelter = E_load_from_sql(table_name="shelter")
df_shelter = pd.DataFrame(data=data_shelter)

  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)
  df = pd.read_sql(sql, conn)


In [12]:
# 合併成主表
df_store = pd.concat([df_salon, df_hotel, df_hospital, df_supplies, df_restaurant, df_shelter], ignore_index=True)

In [13]:
# 清理並計算地區店家數
def T_transform_df_store(df: pd.DataFrame) -> pd.DataFrame:
    df = df.dropna(subset="loc_id")
    df = df.groupby(["loc_id", "category_id"]).size().reset_index(name="store_count")

    return df

df_store_count = T_transform_df_store(df=df_store)

In [14]:
# 將地區與店家資料合併
def T_merge_df_loc_store(df_loc: pd.DataFrame, df_store: pd.DataFrame) -> pd.DataFrame:
    df_StoreSaturation = df_loc.merge(df_store, how="right", on="loc_id")

    return df_StoreSaturation

df_StoreSaturation = T_merge_df_loc_store(df_loc=df_loc, df_store=df_store_count)

In [15]:
# 計算StoreSaturation_raw
def T_calculate_StoreSaturation_raw(df: pd.DataFrame) -> pd.DataFrame:
    df["StoreSaturation_raw"] = df["store_count"] / (df["pet_count"]/10000)

    return df

df_StoreSaturation = T_calculate_StoreSaturation_raw(df=df_StoreSaturation)

In [16]:
# 計算StoreSaturation_raw_norm和MarketSpace
def T_calculate_StoreSaturation_norm(df: pd.DataFrame) -> pd.DataFrame:
    min = df["StoreSaturation_raw"].min()
    max = df["StoreSaturation_raw"].max()
    df["StoreSaturation_norm"] = (df["StoreSaturation_raw"] - min) / (max - min)
    df["MarketSpace"] = 1 - df["StoreSaturation_norm"]
    df["MarketSpace"] = df["MarketSpace"].fillna(0)

    return df

df_StoreSaturation = T_calculate_StoreSaturation_norm(df=df_StoreSaturation)

## 計算PetIndustryMaturity

In [17]:
convenience_data = E_load_from_sql(table_name="convenience_score")
convenience_df = pd.DataFrame(data=convenience_data)

  df = pd.read_sql(sql, conn)


In [18]:
def T_get_city_score_weighted(df: pd.DataFrame) -> pd.DataFrame:
    df["norm_city_10"] = df["norm_city"] * (10/9.5)
    df_city_score = df.groupby(["loc_id", "category_id"])["norm_city_10"].mean().reset_index(name="city_score_weighted")
    df_city_score["city_score_weighted"] = df_city_score["city_score_weighted"].fillna(0)

    return df_city_score

df_city_score = T_get_city_score_weighted(df=convenience_df)

In [19]:
def T_calculate_PetIndustryMaturity(df: pd.DataFrame) -> pd.DataFrame:
    df["PetIndustryMaturity_norm"] = df["city_score_weighted"] / 10
    df["PetIndustryMaturity_norm"] = df["PetIndustryMaturity_norm"].fillna(0)

    return df

df_PetIndustryMaturity = T_calculate_PetIndustryMaturity(df=df_city_score)

In [20]:
df_PetIndustryMaturity[df_PetIndustryMaturity["city_score_weighted"].isna()]

Unnamed: 0,loc_id,category_id,city_score_weighted,PetIndustryMaturity_norm


## 計算SAI

In [21]:
def T_final_merge(df1: pd.DataFrame, df2: pd.DataFrame, df3: pd.DataFrame) -> pd.DataFrame:
    df1 = df1[["loc_id", "MarketPotential_norm"]]

    df_final = df2.merge(df1, how="left", on=["loc_id"])

    df_final = df_final.merge(df3, how="left", on=["loc_id", "category_id"])

    df_final[["city_score_weighted", "PetIndustryMaturity_norm"]] = df_final[["city_score_weighted", "PetIndustryMaturity_norm"]].fillna(0)

    return df_final

df_final = T_final_merge(df1=df_MarketPotential, df2=df_StoreSaturation, df3=df_PetIndustryMaturity)


In [22]:
def T_calculate_SAI(df: pd.DataFrame, a1: float, a2: float, a3: float) -> pd.DataFrame:
    df["SAI"] = (df["MarketPotential_norm"]**a1) * (df["MarketSpace"]**a2) * (df["PetIndustryMaturity_norm"]**a3)
    df["SAI"] = df["SAI"].fillna(0)
    df["SAI_final"] = df["SAI"] * 10

    return df

df_final = T_calculate_SAI(df=df_final, a1=a1, a2=a2, a3=a3)

In [23]:
df_final.columns

Index(['loc_id', 'city', 'district', 'area', 'population', 'pet_regis',
       'pet_removal', 'pet_count', 'category_id', 'store_count',
       'StoreSaturation_raw', 'StoreSaturation_norm', 'MarketSpace',
       'MarketPotential_norm', 'city_score_weighted',
       'PetIndustryMaturity_norm', 'SAI', 'SAI_final'],
      dtype='object')

In [25]:
df_test = df_final[["loc_id", "category_id", "SAI_final"]]
df_test

Unnamed: 0,loc_id,category_id,SAI_final
0,KSH001,2,8.314272
1,KSH001,4,3.639278
2,KSH001,5,3.615677
3,KSH002,1,0.000000
4,KSH002,3,0.000000
...,...,...,...
688,TYN012,2,2.401355
689,TYN012,4,1.554668
690,TYN012,5,3.629449
691,TYN013,2,1.453945
