In [5]:
import pandas as pd

user = pd.read_csv("../data/user_list.csv", encoding="utf-8")

user.head()

Unnamed: 0,experiment_date,user_id,treatment,ops_type,sub_group,source
0,2025-12-22,5145040,15x2元1張,時段鞏固-小晚尖＋午夜,D-14小晚尖＋午夜(20~2)預估車資未完成叫車,隨機組
1,2025-11-17,302812,15x2元1張,時段鞏固-小晚尖＋午夜,D-14小晚尖＋午夜(20~2)預估車資未完成叫車,隨機組
2,2025-12-01,4375821,15x2元1張,時段鞏固-小晚尖＋午夜,D-14小晚尖＋午夜(20~2)預估車資未完成叫車,隨機組
3,2025-11-24,2273154,15x2元1張,時段鞏固-小晚尖＋午夜,D-14小晚尖＋午夜(20~2)預估車資未完成叫車,隨機組
4,2025-12-22,433188,15x2元1張,時段鞏固-小晚尖＋午夜,D-14小晚尖＋午夜(20~2)預估車資未完成叫車,隨機組


In [6]:
rt = pd.to_datetime(user["experiment_date"], errors="coerce", utc=True)

earliest = rt.min()
latest   = rt.max()

print("earliest experiment_date:", earliest)
print("latest experiment_date:", latest)

earliest experiment_date: 2025-07-28 00:00:00+00:00
latest experiment_date: 2026-01-12 00:00:00+00:00


In [4]:
printed_cols = set()

def print_unique_values_once(df: pd.DataFrame, cols, printed=printed_cols, include_na=True, sort_values=False):
    cols = list(cols)

    # 欄位存在性檢查
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise KeyError(f"Missing columns: {missing}. Available: {list(df.columns)}")

    # 只處理還沒印過的欄位
    todo = [c for c in cols if c not in printed]
    if not todo:
        print("All requested columns have already been printed. (Nothing to do)")
        return

    for col in todo:
        s = df[col]

        # 取 unique（保留原出現順序）
        if include_na:
            u = pd.unique(s.astype("object").where(~s.isna(), other=pd.NA))
        else:
            u = pd.unique(s.dropna().astype("object"))

        # 選配：排序（注意不同型別混在一起時可能排序失敗）
        if sort_values:
            try:
                u = sorted(u, key=lambda x: "" if pd.isna(x) else str(x))
            except Exception:
                pass

        print("\n" + "=" * 80)
        print(f"Column: {col} | dtype: {s.dtype} | n_unique: {len(u)}")
        print("-" * 80)

        for v in u:
            if pd.isna(v):
                print("<NA>")
            else:
                print(repr(v))

        printed.add(col)

    print("\nDONE.")

# ===== 用法 =====
print_unique_values_once(user, ["treatment", "sub_group", "source"])
# 之後想看別的欄位（已印過的欄位會跳過）
# print_unique_values_once(user, ["city", "channel", "source"])



Column: treatment | dtype: object | n_unique: 9
--------------------------------------------------------------------------------
'15x2元1張'
'15元1張'
'20x2元1張'
'20元1張'
'30x2元1張'
'30元1張'
'40x2元1張'
'40元1張'
'不發'

Column: sub_group | dtype: object | n_unique: 29
--------------------------------------------------------------------------------
'D-14小晚尖＋午夜(20~2)預估車資未完成叫車'
'D-14平日早尖峰(7~9)有預估車資未完成叫車'
'D-14平日晚尖峰(17~19)預估車資未完成叫車'
'D-90>0,D-14=0'
'D-90>=2,D-90優惠敏感度高,D-14=0'
'D-90天天領日日搭,D-90>=3,D-14=0'
'D-90小晚尖＋午夜(20~2)預估車資人群>=3,D-14=0'
'D-90平日早尖峰(7~9)預估車資人群>=3,D-14=0'
'D-90平日晚尖峰(17~19)預估車資人群>=3,D-14=0'
'D-90潮汐族,D-90>=3,D-14=0'
'M-0=regular,D-14=0'
'M-0=regular,W-4=1,D-7=0'
'M-1=reactived,D-90優惠敏感度高,D-14=0'
'M-1=reactived,M-1=1,D-14=0'
'M-1=reactived,M-1>=2,D-14=0'
'M-1=regular,D-14=0'
'M-1=regular,W-4=1,D-7=0'
'M-2=regular,D-14=0'
'D-90>=2,90D優惠敏感度高,D-14=0'
'D-90潮汐族、D-90>=3、D-14=0'
'M-1=reactived,90D優惠敏感度高,D-14=0'
'M-1=regular,D-30=0'
'M-1=regular,M-1=1,D-7=0'
'M-2=regular,D-30=0'
'M=reactived,90D優惠

In [6]:
import pandas as pd

def print_unique_treatment_by_source(df: pd.DataFrame,
                                    source_col: str = "source",
                                    treatment_col: str = "treatment",
                                    sources=("隨機組", "控制組"),
                                    include_na: bool = True,
                                    sort_values: bool = False):
    # 欄位檢查
    for c in [source_col, treatment_col]:
        if c not in df.columns:
            raise KeyError(f"Missing column: {c}. Available: {list(df.columns)}")

    for src in sources:
        sub = df[df[source_col] == src]
        s = sub[treatment_col]

        # unique（保留原出現順序）
        if include_na:
            u = pd.unique(s.astype("object").where(~s.isna(), other=pd.NA))
        else:
            u = pd.unique(s.dropna().astype("object"))

        # 選配：排序（不同型別混雜時可能排序失敗）
        if sort_values:
            try:
                u = sorted(u, key=lambda x: "" if pd.isna(x) else str(x))
            except Exception:
                pass

        print("\n" + "=" * 80)
        print(f"{source_col} = {repr(src)} | n_rows: {len(sub)} | {treatment_col}.n_unique: {len(u)}")
        print("-" * 80)
        for v in u:
            print("<NA>" if pd.isna(v) else repr(v))

    print("\nDONE.")

# ===== 用法 =====
print_unique_treatment_by_source(user)



source = '隨機組' | n_rows: 833150 | treatment.n_unique: 8
--------------------------------------------------------------------------------
'15x2元1張'
'15元1張'
'20x2元1張'
'20元1張'
'30x2元1張'
'30元1張'
'40x2元1張'
'40元1張'

source = '控制組' | n_rows: 584216 | treatment.n_unique: 1
--------------------------------------------------------------------------------
'不發'

DONE.
