In [201]:
import pandas as pd
import numpy as np
from metric_funcs import Ttest, Metric
from frt_pvalue import sharp_null_hypothesis_pvalue
from generator import EnvironmentGenerator
from recommandation import RecommandationSys
import xlwt
import warnings
import time
warnings.filterwarnings("ignore")

1. 总体样本的生成

In [202]:
# # 1. 模拟前参数准备
# 1.1 定义全局参数
total_user = 100000
total_feed = 2000
# target_click = 3000
impression_number= 10

# 1.2 调用实验数据
# 环境变量
rec_env = EnvironmentGenerator(total_user, total_feed)
rec_sys = RecommandationSys(rec_env.get_users(), rec_env.get_feeds())
# 获取全部users,feeds 特征
users = rec_env.get_users()
feeds = rec_env.get_feeds()
# 创建 is_new_index
is_new_index = np.zeros(len(feeds))
is_new_index[(feeds[:, 1] > 0.002) & (feeds[:, 1] <= 0.003)] = 1

new_ad_index = np.where(is_new_index == 1)[0]
new_feed_number= len(new_ad_index)
mature_feed_number= total_feed- new_feed_number
# print(is_new_index)
# 创建 coefficient_ctr_model
coefficients=rec_sys.generate_groundtruth_model()
coefficient_ctr_model= coefficients[1]
# 创建 target_bid_price
target_bid_price= np.random.normal(10.89, 1, total_feed)

print("新广告条数:",new_feed_number)
print("mature 广告条数:",mature_feed_number)
print("所有广告ctr的均值",np.mean(feeds[:,1]))
print("新广告ctr的均值：",np.mean(feeds[:,1][(is_new_index==1)]))
print("mature 广告均值:",np.mean(feeds[:,1][(is_new_index==0)]))

新广告条数: 385
mature 广告条数: 1615
所有广告ctr的均值 0.0028140766799801026
新广告ctr的均值： 0.0024559713782141635
mature 广告均值: 0.002899445436128639


2. 渐进式双边实验

In [203]:
T1 = time.time()
# 1.3 构造实验变量
# 构造treatment和control的初始数据
is_new_index_t=is_new_index; is_new_index_c=is_new_index
target_bid_price_t= target_bid_price; target_bid_price_c= target_bid_price
new_ad_index_t= new_ad_index; new_ad_index_c= new_ad_index

# 调整new feed 的 bid
current_cpc_c = np.zeros(total_feed)
for i in range(int(total_feed)):
    if is_new_index[i] == 0:
        current_cpc_c[i] = target_bid_price[i]
    else:
        current_cpc_c[i] = target_bid_price[i] + 2
# print(current_cpc)
current_cpc_t = current_cpc_c

实验分组

In [636]:
# 实验流量
# treatment user/feed
treatment_ratio= 0.25
control_ratio= 0.25
print('================================================')
print('cold_start--双边实验')
print('treatment_ratio：', treatment_ratio)
print('control_ratio：', control_ratio)
print('================================================')

cold_start--双边实验
treatment_ratio： 0.25
control_ratio： 0.25


In [637]:
# 参与实验的user数
t_user_number= int(total_user* treatment_ratio)
c_user_number= int(total_user* treatment_ratio)
# t_user_number= int(total_user* 0.3)
# c_user_number= int(total_user* 0.3)
# 参与实验的feed数
# t_new_feed_number= int(new_feed_number * treatment_ratio)
t_new_feed_number= int(new_feed_number * 0.3)
t_all_feed_number= int(mature_feed_number+ t_new_feed_number)
# c_new_feed_number= int(new_feed_number * treatment_ratio)
c_new_feed_number= int(new_feed_number * 0.3)
c_all_feed_number= int(mature_feed_number+ c_new_feed_number)

# # 2. 实验变量建立
# 2.2 建立实验组和对照组的index
# 建立feed侧的分组变量
is_new_treatment = np.zeros(total_feed)
is_new_control = np.zeros(total_feed)
# 全部的index
new_ad_index = np.where(is_new_index == 1)[0]
treatment_new_index = new_ad_index[0:t_new_feed_number]
# print(treatment_new_index)
control_new_index = treatment_new_index

# 实验组，对照组的向量
for i in range(t_new_feed_number):
    is_new_treatment[int(treatment_new_index[i])] = 1
    is_new_control[int(control_new_index[i])] = 1

# 最终有；is_new_index,is_new_control,is_new_treatment,
       # current_cpc_c,
print(is_new_index.sum())
print(is_new_index_c.sum())
print(is_new_index_t.sum())
print(is_new_treatment.sum())
print(is_new_control.sum())

385.0
385.0
385.0
115.0
115.0


1. control

In [638]:
# # 2. 模拟双边的 control
print('================================================')
print(f"cold_start--{treatment_ratio} control")
print('================================================')

# 2.1 建立实验输出变量
# feed侧输出变量(所有feed--1000条)
c_total_impressions_feed = np.zeros(total_feed)
c_total_clicks_feed = np.zeros(total_feed)
# user侧输出变量(所有user)
c_total_impressions_user = np.zeros(int(c_user_number))
c_total_clicks_user = np.zeros(int(c_user_number))
# user侧输出变量(所有user-new feed)
c_total_impressions_user_c = np.zeros(int(c_user_number))
c_total_clicks_user_c = np.zeros(int(c_user_number))
c_total_impressions_user_mature= np.zeros(int(c_user_number))
see_new_c=np.zeros(int(c_user_number))

# 2.2 100% control模拟
for user_index in range(int(c_user_number)):
# for user_index in range(10):
    # (1) 调用real_ctr
    user_i = users[user_index]
    real_ctr = np.maximum(0, np.minimum(np.matmul(coefficient_ctr_model, user_i), 1))
    # (2) 召回 + 排序 + 曝光
    pCTR = real_ctr
    ecpm = pCTR * current_cpc_c
    # 屏蔽
    rank = np.argsort(-ecpm)
    impression_id=[]
    for i in range(len(rank)):
        if len(impression_id)<10:
            if is_new_control[rank[i]]==1:
                impression_id.append(rank[i])
            elif is_new_index[rank[i]]==0:
                impression_id.append(rank[i])
            else:
                pass

    impression_id=np.array(impression_id)         # list 和 array 互换: np.array(a),a.tolist()
    impression_id = impression_id.astype("int64")
    # print(impression_id)
    for i in impression_id:
        if is_new_control[i] == 1:
            see_new_c[user_index] += 1
    # (3) 输出变量
    # 曝光
    for j in impression_id:
        c_total_impressions_feed[j] += 1
        c_total_impressions_user[user_index] += 1  # user: 所有feed：t + mature
        if is_new_control[j]==1:
            c_total_impressions_user_c[user_index] += 1 # user : new feed
        if is_new_index[j]==0:
            c_total_impressions_user_mature[user_index] += 1
    # 点击
    is_clicked = np.zeros(total_feed)
    for j in impression_id:
        is_clicked[j] = np.random.binomial(1, max(0, min(real_ctr[j], 1)))
        if is_clicked[j] == 1:  # if clicked
            c_total_clicks_feed[j] += 1
            c_total_clicks_user[user_index]+= 1
            if is_new_control[j] == 1:
                c_total_clicks_user_c[user_index] += 1

    # (5) 判断new feed是否变成了mature feed
    # 如果 click > traget_clicks, 就变为 mature ads, 不再给它加 bid
    # current_cpc_c = np.less(c_total_clicks_feed, target_click) * current_cpc_c + (
                # 1 - np.less(c_total_clicks_feed, target_click)) * target_bid_price_c

# 2.3 control 实验结果
# # feed侧结果
# 曝光
c_total_impressions_feed = np.copy(c_total_impressions_feed)   # c+ mature
c_total_impressions_feed_c=[]                          # c(new feed c个)
for i in range(int(c_new_feed_number)):
    c_total_impressions_feed_c.append(c_total_impressions_feed[control_new_index[i]])
# 总值和均值
c_total_impressions_feed_sum= c_total_impressions_feed.sum()
c_total_impressions_feed_average= c_total_impressions_feed_sum/c_all_feed_number   # c +mature
c_total_impressions_feed_c_sum= sum(c_total_impressions_feed_c)
c_total_impressions_feed_c_average= c_total_impressions_feed_c_sum/(c_new_feed_number)

# 点击
c_total_clicks_feed = np.copy(c_total_clicks_feed)
c_total_clicks_feed_c=[]
for i in range(int(c_new_feed_number)):
    c_total_clicks_feed_c.append(c_total_clicks_feed[control_new_index[i]])
# 总值和均值
c_total_clicks_feed_sum= c_total_clicks_feed.sum()
c_total_clicks_feed_average= c_total_clicks_feed_sum/total_feed
c_total_clicks_feed_c_sum= sum(c_total_clicks_feed_c)
c_total_clicks_feed_c_average= c_total_clicks_feed_c_sum/(c_new_feed_number)

# # user侧结果
# 曝光
c_total_impressions_user = np.copy(c_total_impressions_user)
c_total_impressions_user_c = np.copy(c_total_impressions_user_c)
# 总值和均值
c_total_impressions_user_sum= c_total_impressions_user.sum()
c_total_impressions_user_average= c_total_impressions_user_sum/c_user_number
c_total_impressions_user_c_sum= c_total_impressions_user_c.sum()
c_total_impressions_user_c_average= c_total_impressions_user_c_sum/c_user_number

# 点击
c_total_clicks_user = np.copy(c_total_clicks_user)
c_total_clicks_user_c= np.copy(c_total_clicks_user_c)
# 总值和均值
c_total_clicks_user_sum= c_total_clicks_user.sum()
c_total_clicks_user_average= c_total_clicks_user_sum/c_user_number
c_total_clicks_user_c_sum= c_total_clicks_user_c.sum()
c_total_clicks_user_c_average= c_total_clicks_user_c_sum/c_user_number


print("user_总曝光：",c_total_impressions_user_sum)
print("user_new曝光：",c_total_impressions_user_c_sum)
print("user_mature曝光：",c_total_impressions_user_mature.sum())
print("c组new feed impression总值:",c_total_impressions_feed_c_sum)
print("c组new feed impression 均值:",c_total_impressions_feed_c_average)
print("c组 new feed impression 人均：",c_total_impressions_user_c_average)

cold_start--0.25 control
user_总曝光： 250000.0
user_new曝光： 38835.0
user_mature曝光： 211165.0
c组new feed impression总值: 38835.0
c组new feed impression 均值: 337.69565217391306
c组 new feed impression 人均： 1.5534


2. treatment

In [639]:
# # 3. 100% treatment 实验
print('================================================')
print(f"cold_start--{treatment_ratio} treatment")
print('================================================')

# 3.1 建立实验输出变量
# feed侧输出变量
t_total_impressions_feed = np.zeros(total_feed)
t_total_clicks_feed = np.zeros(total_feed)

# user侧输出变量
t_total_impressions_user = np.zeros(int(t_user_number))
t_total_clicks_user = np.zeros(int(t_user_number))

t_total_impressions_user_t = np.zeros(int(t_user_number))
t_total_clicks_user_t = np.zeros(int(t_user_number))
t_total_impressions_user_mature= np.zeros(int(t_user_number))

see_new_t=np.zeros(int(t_user_number))

# 3.2 100% treatment模拟
for user_index in range(int(t_user_number)):
# for user_index in range(10):
    # (1) 调用real_ctr
    user_i = users[user_index]
    real_ctr = np.maximum(0, np.minimum(np.matmul(coefficient_ctr_model, user_i), 1))
    # (2) 召回 + 排序 + 曝光
    pCTR = real_ctr
    ecpm = pCTR * current_cpc_t
    if np.random.binomial(1, 0.3) == 1:
        # print("置顶了")
        top_index = np.random.choice(treatment_new_index, int(2), replace=False)
        top_index=np.array(top_index)[0:2]
        # 排序
        rank= np.argsort(-ecpm)
        rest_index= []
        for i in range(len(rank)):
            if len(rest_index)<8:
                if rank[i] not in top_index:
                    if is_new_treatment[rank[i]]==1:
                        rest_index.append(rank[i])
                    elif is_new_index[rank[i]]==0:
                        rest_index.append(rank[i])
                    else:
                        pass
                else:
                    pass
        impression_id=[]
        for i in top_index:
            impression_id.append(i)
        for j in rest_index:
            impression_id.append(j)
    else:
        # print("没置顶")
        rank = np.argsort(-ecpm)
        impression_id=[]
        for i in range(len(rank)):
            if len(impression_id)<10:
                if is_new_treatment[rank[i]]==1:
                    impression_id.append(rank[i])
                elif is_new_index[rank[i]]==0:
                    impression_id.append(rank[i])
                else:
                    pass
    # print(impression_id)
    impression_id = np.array(impression_id)  # list 和 array 互换: np.array(a),a.tolist()
    impression_id = impression_id.astype("int64")
    # see_new_t = 0
    for i in impression_id:
        if is_new_treatment[i] == 1:
            see_new_t[user_index] += 1
    # (3) 输出变量
    # 曝光
    for j in impression_id:
        t_total_impressions_feed[j] +=1
        t_total_impressions_user[user_index] += 1
        if is_new_treatment[j]==1:
            t_total_impressions_user_t[user_index] += 1
        if is_new_index[j]==0 and is_new_treatment[j]==0:
            t_total_impressions_user_mature[user_index] += 1
    # 点击
    is_clicked = np.zeros(total_feed)
    for j in impression_id:
        is_clicked[j] = np.random.binomial(1, max(0, min(real_ctr[j], 1)))
        if is_clicked[j] == 1:  # if clicked
            t_total_clicks_feed[j] += 1
            t_total_clicks_user[user_index]+= 1
            if is_new_treatment[j] == 1:
                t_total_clicks_user_t[user_index] += 1

    # (5) 判断 new feed 是否变成了mature feed
    # current_cpc_t = np.less(t_total_clicks_feed, target_click) * current_cpc_t + (
                # 1 - np.less(t_total_clicks_feed, target_click)) * target_bid_price_t

# 3.3 treatment 实验结果
# # feed侧结果
# 曝光
t_total_impressions_feed = np.copy(t_total_impressions_feed)
t_total_impressions_feed_t=[]                          # 300个new feed
for i in range(int(t_new_feed_number)):
    t_total_impressions_feed_t.append(t_total_impressions_feed[treatment_new_index[i]])
# 总值和均值
t_total_impressions_feed_sum= t_total_impressions_feed.sum()
t_total_impressions_feed_average= t_total_impressions_feed_sum/t_all_feed_number
t_total_impressions_feed_t_sum= sum(t_total_impressions_feed_t)
t_total_impressions_feed_t_average= t_total_impressions_feed_t_sum/(t_new_feed_number)

# 点击
t_total_clicks_feed = np.copy(t_total_clicks_feed)
t_total_clicks_feed_t=[]
for i in range(int(t_new_feed_number)):
    t_total_clicks_feed_t.append(t_total_clicks_feed[treatment_new_index[i]])
# 总值和均值
t_total_clicks_feed_sum= t_total_clicks_feed.sum()
t_total_clicks_feed_average= t_total_clicks_feed_sum/t_all_feed_number
t_total_clicks_feed_t_sum= sum(t_total_clicks_feed_t)
t_total_clicks_feed_t_average= t_total_clicks_feed_t_sum/(t_new_feed_number)

# # user侧结果
# 曝光
t_total_impressions_user = np.copy(t_total_impressions_user)
t_total_impressions_user_t = np.copy(t_total_impressions_user_t)
# 总值和均值
t_total_impressions_user_sum= t_total_impressions_user.sum()
t_total_impressions_user_average= t_total_impressions_user_sum/t_user_number
t_total_impressions_user_t_sum= t_total_impressions_user_t.sum()
t_total_impressions_user_t_average= t_total_impressions_user_t_sum/t_user_number

# 点击
t_total_clicks_user = np.copy(t_total_clicks_user)
t_total_clicks_user_t= np.copy(t_total_clicks_user_t)
# 总值和均值
t_total_clicks_user_sum= t_total_clicks_user.sum()
t_total_clicks_user_average= t_total_clicks_user_sum/t_user_number
t_total_clicks_user_t_sum= t_total_clicks_user_t.sum()
t_total_clicks_user_t_average= t_total_clicks_user_t_sum/t_user_number

see_new_t=np.copy(see_new_t)

print("user总曝光：",t_total_impressions_user_sum)
print("user_new总曝光：",t_total_impressions_user_t_sum)
print("user_mature总曝光：",t_total_impressions_user_mature.sum())
print("t组new feed impression总值:",t_total_impressions_feed_t_sum)
print("t组new feed impression 均值:",t_total_impressions_feed_t_average)
print("t组 new feed impression 人均：",t_total_impressions_user_t_average)

cold_start--0.25 treatment
user总曝光： 250000.0
user_new总曝光： 50992.0
user_mature总曝光： 199008.0
t组new feed impression总值: 50992.0
t组new feed impression 均值: 443.4086956521739
t组 new feed impression 人均： 2.03968


3. T test

In [640]:
# # 4. T-test
# # feed侧
# 曝光
data0=pd.DataFrame()
for i in range(int(c_new_feed_number)):
    data0.loc[i,"groupid"]= 0
    data0.loc[i,"numerator"]=c_total_impressions_feed_c[i]
    data0.loc[i,"denominator"]= 1
data1=pd.DataFrame()
for i in range(int(t_new_feed_number)):
    data1.loc[i, "groupid"] = 1
    data1.loc[i, "numerator"] = t_total_impressions_feed_t[i]
    data1.loc[i, "denominator"] = 1

data=pd.concat([data0,data1])
impressions_feed_p1= sharp_null_hypothesis_pvalue(data)
print("impressions_feed_p1:",impressions_feed_p1)

# 点击
data0=pd.DataFrame()
for i in range(int(c_new_feed_number)):
    data0.loc[i,"groupid"]= 0
    data0.loc[i,"numerator"]=c_total_clicks_feed_c[i]
    data0.loc[i,"denominator"]= 1
data1=pd.DataFrame()
for i in range(int(t_new_feed_number)):
    data1.loc[i, "groupid"] = 1
    data1.loc[i, "numerator"] = t_total_clicks_feed_t[i]
    data1.loc[i, "denominator"] = 1

data=pd.concat([data0,data1])
clicks_feed_p1= sharp_null_hypothesis_pvalue(data)
print("clicks_feed_p1:",clicks_feed_p1)

# ctr
data0=pd.DataFrame()
for i in range(int(c_new_feed_number)):
    data0.loc[i,"groupid"]= 0
    data0.loc[i,"numerator"]=c_total_clicks_feed_c[i]
    data0.loc[i,"denominator"]= c_total_impressions_feed_c[i]
data1=pd.DataFrame()
for i in range(int(t_new_feed_number)):
    data1.loc[i, "groupid"] = 1
    data1.loc[i, "numerator"] = t_total_clicks_feed_t[i]
    data1.loc[i, "denominator"] = t_total_impressions_feed_t[i]

data=pd.concat([data0,data1])
ctr_feed_p1= sharp_null_hypothesis_pvalue(data)
print("ctr_feed_p1:",ctr_feed_p1)

# # user侧
# 曝光
content_c=(c_total_impressions_user_c).tolist()
count_c=[1 for _ in range(c_user_number)]
content_t=(t_total_impressions_user_t).tolist()
count_t=[1 for _ in range(t_user_number)]
M_c=Metric(content_c,count_c)
M_t=Metric(content_t,count_t)
impression_user_p= Ttest(M_c,M_t).pvalue()
print("impression_user_p:",impression_user_p)

# 点击
content_c=(c_total_clicks_user_c).tolist()
count_c=[1 for _ in range(c_user_number)]
content_t=(t_total_clicks_user_t).tolist()
count_t=[1 for _ in range(t_user_number)]
M_c=Metric(content_c,count_c)
M_t=Metric(content_t,count_t)
clicks_user_p= Ttest(M_c,M_t).pvalue()
print("clicks_user_p:",clicks_user_p)

# ctr
content_c=(c_total_clicks_user_c).tolist()
count_c=(c_total_impressions_user_c).tolist()
content_t=(t_total_clicks_user_t).tolist()
count_t=(t_total_impressions_user_t).tolist()
M_c=Metric(content_c,count_c)
M_t=Metric(content_t,count_t)
ctr_user_p= Ttest(M_c,M_t).pvalue()
print("ctr_user_p:",ctr_user_p)

impressions_feed_p1: 0.17365
clicks_feed_p1: 0.4437
ctr_feed_p1: 5e-05
impression_user_p: 0.0
clicks_user_p: 0.016450740640770123
ctr_user_p: 0.0


4. 输出实验结果

In [641]:
# # 5. 输出实验结果
# 5.1 feed侧
outputs_feed= pd.DataFrame()
index=["曝光数","点击数","点击率ctr"]
col=["t_all_feed(所有user)","t_average(/总feed数1000)","t_new_feed(所有user)","t_average(/new_feed数300)",
     "c_all_feed(所有user)","c_average(/总feed数1000)","c_new_feed(所有user)","c_average(/new feed数300)","ATE","p_value"]
###曝光
outputs_feed.loc[index[0],col[0]]= t_total_impressions_feed_sum
outputs_feed.loc[index[0],col[1]]= t_total_impressions_feed_average
outputs_feed.loc[index[0],col[2]]= t_total_impressions_feed_t_sum
outputs_feed.loc[index[0],col[3]]= t_total_impressions_feed_t_average
outputs_feed.loc[index[0],col[4]]= c_total_impressions_feed_sum
outputs_feed.loc[index[0],col[5]]= c_total_impressions_feed_average
outputs_feed.loc[index[0],col[6]]= c_total_impressions_feed_c_sum
outputs_feed.loc[index[0],col[7]]= c_total_impressions_feed_c_average
outputs_feed.loc[index[0],col[8]]= t_total_impressions_feed_t_average - c_total_impressions_feed_c_average
outputs_feed.loc[index[0],col[9]]= impressions_feed_p1
###点击
outputs_feed.loc[index[1],col[0]]=t_total_clicks_feed_sum
outputs_feed.loc[index[1],col[1]]=t_total_clicks_feed_average
outputs_feed.loc[index[1],col[2]]=t_total_clicks_feed_t_sum
outputs_feed.loc[index[1],col[3]]=t_total_clicks_feed_t_average
outputs_feed.loc[index[1],col[4]]=c_total_clicks_feed_sum
outputs_feed.loc[index[1],col[5]]=c_total_clicks_feed_average
outputs_feed.loc[index[1],col[6]]=c_total_clicks_feed_c_sum
outputs_feed.loc[index[1],col[7]]=c_total_clicks_feed_c_average
outputs_feed.loc[index[1],col[8]]=t_total_clicks_feed_t_average-c_total_clicks_feed_c_average
outputs_feed.loc[index[1],col[9]]=clicks_feed_p1
###点击率
outputs_feed.loc[index[2],col[0]]=t_total_clicks_feed_sum/ t_total_impressions_feed_sum
outputs_feed.loc[index[2],col[1]]=t_total_clicks_feed_average / t_total_impressions_feed_average # 与上面一样
outputs_feed.loc[index[2],col[2]]=t_total_clicks_feed_t_sum/ t_total_impressions_feed_t_sum
outputs_feed.loc[index[2],col[3]]=t_total_clicks_feed_t_average / t_total_impressions_feed_t_average  # 与上面一样
outputs_feed.loc[index[2],col[4]]=c_total_clicks_feed_sum / c_total_impressions_feed_sum
outputs_feed.loc[index[2],col[5]]=c_total_clicks_feed_average / c_total_impressions_feed_average    # 与上面一样
outputs_feed.loc[index[2],col[6]]=c_total_clicks_feed_c_sum / c_total_impressions_feed_c_sum
outputs_feed.loc[index[2],col[7]]=c_total_clicks_feed_c_average / c_total_impressions_feed_c_average  # 与上面一样
outputs_feed.loc[index[2],col[8]]=(t_total_clicks_feed_t_average / t_total_impressions_feed_t_average)-(c_total_clicks_feed_c_average / c_total_impressions_feed_c_average)
outputs_feed.loc[index[2],col[9]]=ctr_feed_p1

print('================================================')
print(f"{treatment_ratio}- treatment feed侧结果")
print(outputs_feed)
print('================================================')
outputs_feed.to_excel(f"outputs_feed_{treatment_ratio}.xls")

0.25- treatment feed侧结果
        t_all_feed(所有user)  t_average(/总feed数1000)  t_new_feed(所有user)  \
曝光数          250000.000000              144.508671        50992.000000   
点击数           29563.000000               17.088439         4597.000000   
点击率ctr            0.118252                0.118252            0.090151   

        t_average(/new_feed数300)  c_all_feed(所有user)  c_average(/总feed数1000)  \
曝光数                   443.408696       250000.000000              144.508671   
点击数                    39.973913        30537.000000               15.268500   
点击率ctr                  0.090151            0.122148                0.105658   

        c_new_feed(所有user)  c_average(/new feed数300)         ATE  p_value  
曝光数           38835.000000                337.695652  105.713043  0.17365  
点击数            4373.000000                 38.026087    1.947826  0.44370  
点击率ctr            0.112605                  0.112605   -0.022453  0.00005  


In [642]:
# 5.1 user侧
outputs_user= pd.DataFrame()
index=["曝光数","点击数","点击率ctr"]
col=["t_all_user(所有feed)",f"average1(/总user数{total_user})","t_all_user(new feed)",f"average2(/总user数{total_user})",
     "c_all_user(所有feed)",f"average3(/总user数{total_user})","c_all_user(new feed)",f"average4(/总user数{total_user})","ATE","p_value"]
###曝光
outputs_user.loc[index[0],col[0]]= t_total_impressions_user_sum
outputs_user.loc[index[0],col[1]]= t_total_impressions_user_average
outputs_user.loc[index[0],col[2]]= t_total_impressions_user_t_sum
outputs_user.loc[index[0],col[3]]= t_total_impressions_user_t_average
outputs_user.loc[index[0],col[4]]= c_total_impressions_user_sum
outputs_user.loc[index[0],col[5]]= c_total_impressions_user_average
outputs_user.loc[index[0],col[6]]= c_total_impressions_user_c_sum
outputs_user.loc[index[0],col[7]]= c_total_impressions_user_c_average
outputs_user.loc[index[0],col[8]]= t_total_impressions_user_t_average-c_total_impressions_user_c_average
outputs_user.loc[index[0],col[9]]= impression_user_p
###点击
outputs_user.loc[index[1],col[0]]=t_total_clicks_user_sum
outputs_user.loc[index[1],col[1]]=t_total_clicks_user_average
outputs_user.loc[index[1],col[2]]=t_total_clicks_user_t_sum
outputs_user.loc[index[1],col[3]]=t_total_clicks_user_t_average
outputs_user.loc[index[1],col[4]]=c_total_clicks_user_sum
outputs_user.loc[index[1],col[5]]=c_total_clicks_user_average
outputs_user.loc[index[1],col[6]]=c_total_clicks_user_c_sum
outputs_user.loc[index[1],col[7]]=c_total_clicks_user_c_average
outputs_user.loc[index[1],col[8]]=t_total_clicks_user_t_average-c_total_clicks_user_c_average
outputs_user.loc[index[1],col[9]]= clicks_user_p
###点击率
outputs_user.loc[index[2],col[0]]=t_total_clicks_user_sum / t_total_impressions_user_sum
outputs_user.loc[index[2],col[1]]=t_total_clicks_user_average / t_total_impressions_user_average    # 与上面一样
outputs_user.loc[index[2],col[2]]=t_total_clicks_user_t_sum / t_total_impressions_user_t_sum
outputs_user.loc[index[2],col[3]]=t_total_clicks_user_t_average / t_total_impressions_user_t_average   # 与上面一样
outputs_user.loc[index[2],col[4]]=c_total_clicks_user_sum/ c_total_impressions_user_sum
outputs_user.loc[index[2],col[5]]=c_total_clicks_user_average/ c_total_impressions_user_average    # 与上面一样
outputs_user.loc[index[2],col[6]]=c_total_clicks_user_c_sum / c_total_impressions_user_c_sum
outputs_user.loc[index[2],col[7]]=c_total_clicks_user_c_average / c_total_impressions_user_c_average  # 与上面一样
outputs_user.loc[index[2],col[8]]= (t_total_clicks_user_t_average / t_total_impressions_user_t_average)-(c_total_clicks_user_c_average / c_total_impressions_user_c_average)
outputs_user.loc[index[2],col[9]]= ctr_user_p

print('================================================')
print(f"{treatment_ratio}- treatment user侧结果")
print(outputs_user)
print('================================================')
outputs_user.to_excel(f"outputs_user_{treatment_ratio}.xls")

0.25- treatment user侧结果
        t_all_user(所有feed)  average1(/总user数100000)  t_all_user(new feed)  \
曝光数          250000.000000                10.000000          50992.000000   
点击数           29563.000000                 1.182520           4597.000000   
点击率ctr            0.118252                 0.118252              0.090151   

        average2(/总user数100000)  c_all_user(所有feed)  average3(/总user数100000)  \
曝光数                    2.039680       250000.000000                10.000000   
点击数                    0.183880        30537.000000                 1.221480   
点击率ctr                 0.090151            0.122148                 0.122148   

        c_all_user(new feed)  average4(/总user数100000)       ATE   p_value  
曝光数             38835.000000                 1.553400  0.486280  0.000000  
点击数              4373.000000                 0.174920  0.008960  0.016451  
点击率ctr              0.112605                 0.112605 -0.022453  0.000000  


In [643]:
print("c组new feed平均曝光量：",see_new_c.mean())
print("t组new feed平均曝光量：",see_new_t.mean())
print("差值",see_new_t.mean()-see_new_c.mean())

c组new feed平均曝光量： 1.5534
t组new feed平均曝光量： 2.03968
差值 0.48628000000000027


In [644]:
T2 = time.time()
print('程序运行时间:%s秒' % ((T2 - T1)))

程序运行时间:174194.85534715652秒
