In [30]:
import DevTools_Stats_Utils as Utils
import pandas as pd
from scipy import stats
import numpy as np
import statsmodels.api as sm

In [31]:
df = pd.read_csv("data/cuped_info.csv")
df_ratio = pd.read_csv("data/找选接确完车主量.csv")

In [32]:
df.head(3)

Unnamed: 0,group_name,user_id,enter_date,ordercnt_before,ordercnt_after,gmv_before,gmv_after
0,实验组,1000080708,20240525,0.0,0.0,0.0,0.0
1,实验组,1000205418,20240525,37.0,23.0,3696.0,2639.4
2,实验组,1000218110,20240530,0.0,0.0,0.0,0.0


In [33]:
df.columns

Index(['group_name', 'user_id', 'enter_date', 'ordercnt_before',
       'ordercnt_after', 'gmv_before', 'gmv_after'],
      dtype='object')

In [34]:
df.size

32693864

### 最小样本量计算（连续型/比率型）

In [38]:
import numpy as np
import pandas as pd
from statsmodels.stats.power import TTestIndPower

data = df.copy()
control = data[data['group_name'] == '对照组']['ordercnt_after']
treatment = data[data['group_name'] == '实验组']['ordercnt_after']

# Calculate means
mean_control = np.mean(control)
mean_treatment = np.mean(treatment)

# Calculate standard deviations
std_control = np.std(control, ddof=1)  # ddof=1 for sample standard deviation
std_treatment = np.std(treatment, ddof=1)

# Calculate pooled standard deviation
n_control = len(control)
n_treatment = len(treatment)
pooled_std = np.sqrt(((n_control - 1) * std_control**2 + (n_treatment - 1) * std_treatment**2) / (n_control + n_treatment - 2))

# Calculate Cohen's d
cohens_d = (mean_treatment - mean_control) / pooled_std

# Print effect size
print(f"Cohen's d: {cohens_d:.2f}")

# Parameters for power analysis
alpha = 0.05  # significance level
powers = [0.70, 0.80, 0.90, 0.95, 0.99]

analysis = TTestIndPower()
# Calculate required sample sizes
sample_sizes = {power: analysis.solve_power(effect_size=cohens_d, power=power, alpha=alpha, ratio=1.0) for power in powers}
sample_sizes_df = pd.DataFrame(list(sample_sizes.items()), columns=['Power', 'Required Sample Size'])
print(sample_sizes_df)

Cohen's d: 0.00
   Power  Required Sample Size
0   0.70          2.123256e+06
1   0.80          2.700118e+06
2   0.90          3.614699e+06
3   0.95          4.470361e+06
4   0.99          6.320386e+06


### 最小样本量计算（比例型）

In [40]:
from statsmodels.stats.power import NormalIndPower
from statsmodels.stats.proportion import proportion_effectsize

# Calculate the metric (proportion)
data = df_ratio.copy()
data['找完率'] = data['完单车主量'] / data['找单车主量']

# Extract proportions
p_control = data[data['group_name'] == '对照组']['找完率'].values[0]
p_experiment = data[data['group_name'] == '实验组']['找完率'].values[0]

# Print proportions
print(f"Control Group Proportion: {p_control:.2f}")
print(f"Experiment Group Proportion: {p_experiment:.2f}")

# Calculate Cohen's h (effect size for proportions)
effect_size = proportion_effectsize(p_experiment, p_control)

# Print effect size
print(f"Cohen's h: {effect_size:.2f}")

# Parameters for power analysis
alpha = 0.05  # significance level
powers = [0.70, 0.80, 0.90, 0.95, 0.99]

# Create an instance of the power analysis class
analysis = NormalIndPower()

# Calculate required sample sizes
sample_sizes = {power: analysis.solve_power(effect_size=effect_size, power=power, alpha=alpha, ratio=1.0) for power in powers}

# Convert results to a DataFrame for easy viewing
sample_sizes_df = pd.DataFrame(list(sample_sizes.items()), columns=['Power', 'Required Sample Size'])
print(sample_sizes_df)



Control Group Proportion: 0.27
Experiment Group Proportion: 0.27
Cohen's h: 0.00
   Power  Required Sample Size
0   0.70          8.489263e+05
1   0.80          1.079569e+06
2   0.90          1.445240e+06
3   0.95          1.787353e+06
4   0.99          2.527035e+06


### 人均累积GMV （welch's t-test)

In [6]:

# Filter data into experimental and control groups
experiment_group = df[df['group_name'] == '实验组']['gmv_after']
control_group = df[df['group_name'] == '对照组']['gmv_after']

# Perform Welch's T-test
t_stat, p_value = stats.ttest_ind(experiment_group, control_group, equal_var=True)

# Display the results
print(f"T-statistic: {t_stat}")
print(f"人均累积GMV: P-value: {p_value}")

T-statistic: 0.4984117131603499
人均累积GMV: P-value: 0.6181939060893349


### 人均累积订单量 （welch's t-test)

In [7]:
# Filter data into experimental and control groups
experiment_group = df[df['group_name'] == '实验组']['ordercnt_after']
control_group = df[df['group_name'] == '对照组']['ordercnt_after']

# Perform Welch's T-test
t_stat, p_value = stats.ttest_ind(experiment_group, control_group, equal_var=True)

# Display the results
print(f"T-statistic: {t_stat}")
print(f"人均累积订单量: P-value: {p_value}")

T-statistic: 2.605440396403712
人均累积订单量: P-value: 0.009175652747305557


#### 人均累积订单量（样本不足/原样本量1%case)

In [8]:
proportion = 0.01
sampled_df = df.sample(frac=proportion, random_state=1)

experiment_group = sampled_df[sampled_df['group_name'] == '实验组']['ordercnt_after']
control_group = sampled_df[sampled_df['group_name'] == '对照组']['ordercnt_after']

# Welch's T-test
t_stat, p_value = stats.ttest_ind(experiment_group, control_group, equal_var=False)

print(f"T-statistic: {t_stat}")
print(f"样本不足：人均累积订单量: P-value: {p_value}")

T-statistic: 1.3657256832468518
样本不足：人均累积订单量: P-value: 0.17203166342350246


#### 样本不足时的解决方法：检查数据独立性->(block)bootstrap+t-test

In [14]:
import pandas as pd
from scipy import stats
import numpy as np
import statsmodels.api as sm

def bootstrap_ttest(data1, data2, n_bootstraps=10000):
    bootstrapped_means_diff = []
    for _ in range(n_bootstraps):
        sample1 = np.random.choice(data1, size=len(data1), replace=True)
        sample2 = np.random.choice(data2, size=len(data2), replace=True)
        bootstrapped_means_diff.append(sample1.mean() - sample2.mean())
    
    observed_diff = np.mean(data1) - np.mean(data2)
    p_value = np.mean(np.array(bootstrapped_means_diff) >= observed_diff)
    
    return observed_diff, p_value

def block_bootstrap_ttest(data1, data2, block_size, n_bootstraps=10000):
    def block_bootstrap_sample(data, block_size):
        n_blocks = np.int32(np.ceil(len(data) / block_size))
        blocks = [data[i * block_size:(i + 1) * block_size] for i in range(n_blocks)]
        bootstrap_sample = np.concatenate([blocks[np.random.randint(0, n_blocks)] for _ in range(n_blocks)])
        return bootstrap_sample[:len(data)]
    
    bootstrapped_means_diff = []
    for _ in range(n_bootstraps):
        sample1 = block_bootstrap_sample(data1, block_size)
        sample2 = block_bootstrap_sample(data2, block_size)
        bootstrapped_means_diff.append(sample1.mean() - sample2.mean())
    
    observed_diff = np.mean(data1) - np.mean(data2)
    p_value = np.mean(np.array(bootstrapped_means_diff) >= observed_diff)
    
    return observed_diff, p_value



# Check for dependency in the 'ordercnt_after' column
autocorrelation = sm.tsa.acf(sampled_df['ordercnt_after'], fft=False, nlags = np.round(np.sqrt(sampled_df.shape[0])))
# Check if there is significant autocorrelation 
dependency_present = np.any(np.abs(autocorrelation[1:]) > 0.3) 

if dependency_present:
    print("数据为不独立, 使用Block Bootstrap + T-test")
else:
    print("数据为独立, 使用Bootstrap + T-test")

if dependency_present:
    block_size = int(np.round(sampled_df.shape[0]/1000)) 
    observed_diff, p_value = block_bootstrap_ttest(experiment_group, control_group, block_size)
    print(f"Block Bootstrap t-test - Observed difference: {observed_diff}, P-value: {p_value}")
else:
    observed_diff, p_value = bootstrap_ttest(experiment_group, control_group)
    print(f"Bootstrap t-test - Observed difference: {observed_diff}, 样本不足：人均累积订单量 P-value: {p_value}")



数据为独立, 使用Bootstrap + T-test
Bootstrap t-test - Observed difference: 0.0634719049470085, 样本不足：人均累积订单量 P-value: 0.5048


#### Stratified Bootstrap+t-test

In [15]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

# Define the stratified bootstrap t-test function
def stratified_bootstrap_ttest(df, group_col, value_col, strata_cols, n_bootstraps=10000):
    strata = df.groupby(strata_cols)
    bootstrapped_means_diff = []

    for _ in range(n_bootstraps):
        bootstrapped_means = []

        for stratum_name, stratum_df in strata:
            experiment_group = stratum_df[stratum_df[group_col] == '实验组'][value_col].values
            control_group = stratum_df[stratum_df[group_col] == '对照组'][value_col].values

            if len(experiment_group) > 0 and len(control_group) > 0:
                sample1 = np.random.choice(experiment_group, size=len(experiment_group), replace=True)
                sample2 = np.random.choice(control_group, size=len(control_group), replace=True)
                bootstrapped_means.append(sample1.mean() - sample2.mean())

        if bootstrapped_means:
            bootstrapped_means_diff.append(np.mean(bootstrapped_means))

    observed_diff = df[df[group_col] == '实验组'][value_col].mean() - df[df[group_col] == '对照组'][value_col].mean()
    p_value = np.mean(np.array(bootstrapped_means_diff) >= observed_diff)
    
    return observed_diff, p_value

# Check for dependency in the 'ordercnt_after' column
nlag = int(np.sqrt(sampled_df.shape[0]))
autocorrelation = sm.tsa.acf(sampled_df['ordercnt_after'], fft=False, nlags=nlag)
dependency_threshold = 0.3
dependency_present = np.any(np.abs(autocorrelation[1:]) > dependency_threshold)

if dependency_present:
    print("数据为不独立, 使用Block Bootstrap + T-test")
else:
    print("数据为独立, 使用Bootstrap + T-test")

# Define columns for stratification
strata_cols = ['enter_date']  # Ensure this column exists in sampled_df

# Apply stratified bootstrap t-test
observed_diff, p_value = stratified_bootstrap_ttest(sampled_df, 'group_name', 'ordercnt_after', strata_cols)
print(f"Stratified Bootstrap t-test - Observed difference: {observed_diff}, 样本不足：人均累积订单量 P-value: {p_value}")





数据为独立, 使用Bootstrap + T-test
Stratified Bootstrap t-test - Observed difference: 0.0634719049470085, 样本不足：人均累积订单量 P-value: 0.0308


### 比例类

In [16]:
df_ratio['找选率'] = df_ratio['选单车主量'] / df_ratio['找单车主量']
df_ratio['选接率'] = df_ratio['接单车主量'] / df_ratio['选单车主量']
df_ratio['接确率'] = df_ratio['确认同行车主量'] / df_ratio['接单车主量']
df_ratio['接完率'] = df_ratio['完单车主量'] / df_ratio['接单车主量']
df_ratio['找完率'] = df_ratio['完单车主量'] / df_ratio['找单车主量']
df_ratio['确完率'] = df_ratio['完单车主量'] / df_ratio['确认同行车主量']
df_ratio.head()

Unnamed: 0,group_name,找单车主量,选单车主量,接单车主量,确认同行车主量,完单车主量,找选率,选接率,接确率,接完率,找完率,确完率
0,实验组,2123007,1632338,874264,660821,577206,0.76888,0.53559,0.75586,0.660219,0.271881,0.873468
1,对照组,2121454,1632066,871690,657560,573188,0.769315,0.534102,0.754351,0.657559,0.270186,0.871689


In [18]:
df_ratio.columns

Index(['group_name', '找单车主量', '选单车主量', '接单车主量', '确认同行车主量', '完单车主量', '找选率',
       '选接率', '接确率', '接完率', '找完率', '确完率'],
      dtype='object')

In [19]:
print(df_ratio)

  group_name    找单车主量    选单车主量   接单车主量  确认同行车主量   完单车主量       找选率       选接率  \
0        实验组  2123007  1632338  874264   660821  577206  0.768880  0.535590   
1        对照组  2121454  1632066  871690   657560  573188  0.769315  0.534102   

        接确率       接完率       找完率       确完率  
0  0.755860  0.660219  0.271881  0.873468  
1  0.754351  0.657559  0.270186  0.871689  


In [21]:
metrics_data = { '找选率': ['找单车主量', '选单车主量'], 
                '选接率': ['选单车主量', '接单车主量'], 
                '接确率': ['接单车主量', '确认同行车主量'], 
                '接完率': ['接单车主量', '完单车主量'], 
                '找完率': ['找单车主量', '完单车主量'], 
                '确完率': ['确认同行车主量', '完单车主量'] }



# Define a function to perform the Chi-Square test
def chi_square_test(observed):
    chi2, p, _, _ = stats.chi2_contingency(observed)
    return chi2, p

# Perform the test for each metric
results = {}

for metric, counts in metrics_data.items():
    group_1 = df_ratio.loc[0, counts].values
    group_2 = df_ratio.loc[1, counts].values
    observed = [group_1, group_2]
    chi2, p = chi_square_test(observed)
    results[metric] = {'chi2': chi2, 'p-value': p}

results

{'找选率': {'chi2': 0.1467652692468367, 'p-value': 0.7016459493672567},
 '选接率': {'chi2': 2.198049071476394, 'p-value': 0.13818553068264816},
 '接确率': {'chi2': 0.7480069557528477, 'p-value': 0.3871079748972641},
 '接完率': {'chi2': 2.82128184892715, 'p-value': 0.09302208802874981},
 '找完率': {'chi2': 8.842820211454482, 'p-value': 0.0029424411210696236},
 '确完率': {'chi2': 0.6361569505902359, 'p-value': 0.42510585680806423}}

In [22]:
from statsmodels.stats.proportion import proportions_ztest

# two-sample proportion test
def two_sample_proportion_test(success_a, size_a, success_b, size_b):
    count = np.array([success_a, success_b])
    nobs = np.array([size_a, size_b])
    stat, pval = proportions_ztest(count, nobs)
    return stat, pval

# Perform the test for each metric
results = {}

for metric, counts in metrics_data.items():
    success_a = df_ratio.loc[0, counts[1]]
    size_a = df_ratio.loc[0, counts[0]]
    success_b = df_ratio.loc[1, counts[1]]
    size_b = df_ratio.loc[1, counts[0]]
    
    stat, pval = two_sample_proportion_test(success_a, size_a, success_b, size_b)
    results[metric] = {'z-stat': stat, 'p-value': pval}

# Display the results
results

{'找选率': {'z-stat': -1.0624460418419583, 'p-value': 0.28803324134874464},
 '选接率': {'z-stat': 2.6948048420335984, 'p-value': 0.007042986877449176},
 '接确率': {'z-stat': 2.3184326330237774, 'p-value': 0.020425820243672496},
 '接完率': {'z-stat': 3.7067694409956724, 'p-value': 0.00020991988561144383},
 '找完率': {'z-stat': 3.928020195957203, 'p-value': 8.564802014609073e-05},
 '确完率': {'z-stat': 3.06251798875766, 'p-value': 0.0021948328303156776}}

In [23]:
df.columns

Index(['group_name', 'user_id', 'enter_date', 'ordercnt_before',
       'ordercnt_after', 'gmv_before', 'gmv_after'],
      dtype='object')