In [4]:
import numpy as np
import pandas as pd

np.random.seed(42)
# This seed is fixed to ensure reproducibility of the simulated dataset.

n = 3000

df = pd.DataFrame({
    "task_id": range(1, n+1),
    "operator_type": np.random.choice(["new", "experienced"], n, p=[0.4, 0.6]),
    "category": np.random.choice(["electronics", "fashion", "living"], n),
    "period": np.random.choice(["before", "after"], n, p=[0.5, 0.5]),
})



In [5]:
# 1) Baseline Error Generation
def generate_error(row):


    if row["operator_type"] == "new" and row["period"] == "before":
        return np.random.binomial(1, 0.25)

    elif row["operator_type"] == "new" and row["period"] == "after":
        return np.random.binomial(1, 0.15)
        
    else:
        return np.random.binomial(1, 0.08)


df["error_flag"] = df.apply(generate_error, axis=1)
# Error rate is defined as the proportion of tasks with incorrect labeling, which directly reflects catalog accuracy in operational environments.



In [6]:
# 2) Experiment Population Selection (New Operators, After Period)
exp = df[(df["operator_type"]=="new") & (df["period"]=="after")].copy()





In [7]:
# 3) Random Assignment within the Same Population (A/B Test Design)
np.random.seed(123)


exp["group"] = np.random.choice(["control", "treatment"], size=len(exp), p=[0.5, 0.5])
# Random assignment ensures that performance differences are attributable to SOP changes rather than operator characteristics.



In [8]:
# 4) Outcome Simulation under Controlled Conditions
np.random.seed(999)

exp["error_flag_exp"] = np.where(
    exp["group"]=="treatment",
    np.random.binomial(1, 0.12, size=len(exp)),   # 개선안 적용
    np.random.binomial(1, 0.15, size=len(exp))    # 기존 after 신규 수준(베이스라인)
)




In [9]:
# 5) Result Aggregation and Interpretation
result = exp.groupby("group")["error_flag_exp"].mean().reset_index()
# A 27% relative reduction in error rate demonstrates the practical impact of SOP improvements on operational accuracy.

result
# This analysis is based on simulated data. In real-world environments, additional factors such as task complexity and category imbalance should be considered.


Unnamed: 0,group,error_flag_exp
0,control,0.154098
1,treatment,0.112583
