## 机制验证

针对Provider 1 进行推演

In [68]:
import math
import numpy as np
import random

In [116]:
# 超参数的选定
eps = 0.3
T = 1000000
B = int(T**(2*eps))
eta = 0.9
L = 1500
reward_scale = 5
K = 3
M = T**(-eps)*math.log(K*T)

print(f'epsilon: {eps}')
print(f'T: {T}')
print(f'B: {B}')
print(f'eta: {eta}')
print(f'L: {L}')
print(f'reward_scale: {reward_scale}')
print(f'K: {K}')
print(f'M: {M}')


epsilon: 0.3
T: 1000000
B: 3981
eta: 0.9
L: 1500
reward_scale: 5
K: 3
M: 0.23637291771156435


In [117]:
### 三个模型的参数
models = [
    {
        'score': 0.9,
        'output_token': 1000,
        'token_price': 5e-4,
        'utility': 4.0 # 0.9 * reward_scale - 1000*5e-5
    },
    {
        'score': 0.8,
        'output_token': 900,
        'token_price': 4e-4,
        'utility': 3.64 # 0.8 * reward_scale - 1000*4e-5
    },
    {
        'score': 0.7,
        'output_token': 800,
        'token_price': 3e-4,
        'utility': 3.26 # 0.7 * reward_scale - 800*5e-5
    },
]

In [118]:
models_others = [
    {
        'score': 0.8,
        'output_token': 1000,
        'token_price': 5e-4,
        'utility': 3.5 # 0.9 * reward_scale - 1000*5e-5
    },
    {
        'score': 0.7,
        'output_token': 900,
        'token_price': 4e-4,
        'utility': 3.14 # 0.8 * reward_scale - 1000*4e-5
    },
    {
        'score': 0.6,
        'output_token': 800,
        'token_price': 3e-4,
        'utility': 2.76 # 0.7 * reward_scale - 800*5e-5
    },
]

In [119]:
## 计算 delta 1
mu_div = min([model['score']/model['output_token'] for model in models+models_others])
mu_div *= reward_scale
print(f"mu_div: {mu_div}")
max_output_token = max([model['output_token'] for model in models+models_others])
delta_1 = -math.log(mu_div) + max_output_token / L + 1
delta_2 = math.log(reward_scale)

print(f"delta_1: {delta_1}")
print(f"delta_2: {delta_2}")


mu_div: 0.00375
delta_1: 7.252666105666485
delta_2: 1.6094379124341003


In [120]:
# 额外超参数计算，假设所有剩下的供应商使用的模型都是一样的，且都是诚实的
delta_3 = math.log(models_others[0]['score'] * reward_scale / models_others[0]['output_token']) - models_others[0]['output_token']/L
delta_3 *= (K-1)
print(f"delta_3: {delta_3}")

R = int(T - ((delta_1 + 3)*K + delta_2 + delta_3)*B)
print(f"R: {R}")
second_user_utility = 3.5

threshold = second_user_utility - (reward_scale + models[0]['token_price']*models[0]['output_token'])*M/2


print(f"threshold: {threshold}")
# delta = delta_1 + math.log(models)

delta_3: -12.376255169057826
R: 920415
threshold: 2.8499744762931982


In [127]:
def run_ours():
    user_utility = 0
    provider_utility = 0
    delegation_num = 0
    # phase_1: Exploration phase
    # delegate B times honest
    # 诚实的运行一次,user utility 为：

    user_utility_h1 = models[0]['score'] * reward_scale - models[0]['token_price'] * models[0]['output_token']
    print(f'user utility honest run 1 time: {user_utility_h1}')
    user_utility += B*user_utility_h1
    print(f'phase 1 exploration end. \nUser utility: {user_utility}\n Provider utility: {provider_utility}')

    delegation_num += B

    # 开始说谎， B 次B次的迭代，每一次使用次好的模型，但是汇报成最好的模型, 最多 R 次
    phase2_times = int(R//B)
    remain_times = R%B
    phase2_user_utility = 0
    ##
    count = 0
    bonus_provider_utility = 0
    bonus_user_utility = 0
    early_stop_flag = False
    for _ in range(phase2_times):
        phase2_user_utility += B * (models[1]['score']*reward_scale - \
            models[1]['output_token']*models[0]['token_price'])
        
        provider_utility += B * (models[1]['output_token']*models[0]['token_price'] - \
            models[1]['output_token']*models[1]['token_price']) * eta

        count += B
        if phase2_user_utility / count < threshold:
            early_stop_flag = True
            break
    else:
        phase2_user_utility += remain_times * (models[1]['score']*reward_scale - \
            models[1]['output_token']*models[0]['token_price'])
        provider_utility += remain_times * (models[1]['output_token']*models[0]['token_price'] - \
            models[1]['output_token']*models[1]['token_price']) * eta
        count += remain_times
        if phase2_user_utility / count < threshold:
            early_stop_flag = True
    print(f'二阶段 user utility: {phase2_user_utility}')
    print(f'二阶段 provider utility: {provider_utility}')
    user_utility += phase2_user_utility
    delegation_num += count

    if not early_stop_flag:
        bonus_user_utility = B * (models[-1]['score']*reward_scale - \
            L*models[0]['token_price'])
        
        bonus_provider_utility = B * (L*models[0]['token_price'] - \
            models[-1]['output_token']*models[-1]['token_price']) * eta

    print(bonus_provider_utility)
    user_utility += bonus_user_utility
    provider_utility += bonus_provider_utility
    delegation_num += B


    ## 第三阶段
    avg_reward = models[0]['score']*reward_scale
    avg_length = models[0]['output_token']

    delta = delta_1 + math.log(avg_reward) - math.log(avg_length) - avg_length/L
    print(delta)
    will_delegate = int(B * delta)

    user_utility += will_delegate * (models[-1]['score']*reward_scale - \
            L*models[0]['token_price'])
        
    bonus_provider_utility = will_delegate * (L*models[0]['token_price'] - \
            models[-1]['output_token']*models[-1]['token_price']) * eta
    print(f'奖励的 bonus_provider_utility: {bonus_provider_utility}')
    provider_utility += bonus_provider_utility

    delegation_num += will_delegate

    delta_float = B*delta - will_delegate
    if random.random() < delta_float:
        user_utility += 1 * (models[-1]['score']*reward_scale - \
            L*models[0]['token_price'])
        
        provider_utility += 1 * (L*models[0]['token_price'] - \
            models[-1]['output_token']*models[-1]['token_price']) * eta


    
        delegation_num += user_utility

    print(f'user utility: {user_utility}')
    print(f'provider utility: {provider_utility}')
    print(f'delegation num: {delegation_num}')




        


In [133]:
run_ours()

user utility honest run 1 time: 4.0
phase 1 exploration end. 
User utility: 15924.0
 Provider utility: 0
二阶段 user utility: 3267473.2499999898
二阶段 provider utility: 74553.61500000014
1827.279
1.1823215567939562
奖励的 bonus_provider_utility: 2160.054
user utility: 3307286.4999999898
provider utility: 78540.94800000013
delegation num: 933083


In [131]:
def run_7():
    user_utility = 0
    provider_utility = 0
    delegation_num = 0
    # phase_1: Exploration phase
    # delegate B times honest
    # 诚实的运行一次,user utility 为：

    user_utility_h1 = models[1]['score'] * reward_scale - models[0]['token_price'] * models[1]['output_token']
    print(f'user utility honest run 1 time: {user_utility_h1}')
    user_utility += B*user_utility_h1
    print(f'phase 1 exploration end. \nUser utility: {user_utility}\n Provider utility: {provider_utility}')

    delegation_num += B

    # 开始说谎， B 次B次的迭代，每一次使用次好的模型，但是汇报成最好的模型, 最多 R 次
    phase2_times = int(R//B)
    remain_times = R%B
    phase2_user_utility = 0
    ##
    count = 0
    bonus_provider_utility = 0
    bonus_user_utility = 0
    early_stop_flag = False
    for _ in range(phase2_times):
        phase2_user_utility += B * (models[1]['score']*reward_scale - \
            models[1]['output_token']*models[0]['token_price'])
        
        provider_utility += B * (models[1]['output_token']*models[0]['token_price'] - \
            models[1]['output_token']*models[1]['token_price']) * eta

        count += B
        if phase2_user_utility / count < threshold:
            early_stop_flag = True
            break
    else:
        phase2_user_utility += remain_times * (models[1]['score']*reward_scale - \
            models[1]['output_token']*models[0]['token_price'])
        provider_utility += remain_times * (models[1]['output_token']*models[0]['token_price'] - \
            models[1]['output_token']*models[1]['token_price']) * eta
        count += remain_times
        if phase2_user_utility / count < threshold:
            early_stop_flag = True
    print(f'二阶段 user utility: {phase2_user_utility}')
    print(f'二阶段 provider utility: {provider_utility}')
    user_utility += phase2_user_utility
    delegation_num += count

    if not early_stop_flag:
        bonus_user_utility = B * (models[-1]['score']*reward_scale - \
            L*models[0]['token_price'])
        
        bonus_provider_utility = B * (L*models[0]['token_price'] - \
            models[-1]['output_token']*models[-1]['token_price']) * eta

    print(bonus_provider_utility)
    user_utility += bonus_user_utility
    provider_utility += bonus_provider_utility
    delegation_num += B


    ## 第三阶段
    avg_reward = models[1]['score']*reward_scale
    avg_length = models[1]['output_token']

    delta = delta_1 + math.log(avg_reward) - math.log(avg_length) - avg_length/L
    print(delta)
    will_delegate = int(B * delta)

    user_utility += will_delegate * (models[-1]['score']*reward_scale - \
            L*models[0]['token_price'])
        
    bonus_provider_utility = will_delegate * (L*models[0]['token_price'] - \
            models[-1]['output_token']*models[-1]['token_price']) * eta
    print(f'奖励的 bonus_provider_utility: {bonus_provider_utility}')
    provider_utility += bonus_provider_utility

    delegation_num += will_delegate

    delta_float = B*delta - will_delegate
    if random.random() < delta_float:
        user_utility += 1 * (models[-1]['score']*reward_scale - \
            L*models[0]['token_price'])
        
        provider_utility += 1 * (L*models[0]['token_price'] - \
            models[-1]['output_token']*models[-1]['token_price']) * eta


    
        delegation_num += user_utility

    print(f'user utility: {user_utility}')
    print(f'provider utility: {provider_utility}')
    print(f'delegation num: {delegation_num}')




        


In [132]:
run_7()

user utility honest run 1 time: 3.55
phase 1 exploration end. 
User utility: 14132.55
 Provider utility: 0
二阶段 user utility: 3267473.2499999898
二阶段 provider utility: 74553.61500000014
1827.279
1.2365657034620647
奖励的 bonus_provider_utility: 2259.1980000000003
user utility: 3306091.7999999896
provider utility: 78640.55100000014
delegation num: 4239390.79999999


In [None]:
eps = 0.3
T = 1000000
B = T**(2*eps)
eta = 0.9
p = 1/10**5
mu_l = 1000
mu_r = 5
delta_1 = 15
L = 1500

def get_utility():
    avg_beta = L / mu_l
    avg_alpha = 

    item_1 = B * eta * 

SyntaxError: invalid syntax (3129836078.py, line 2)