# Reproduce Table 2

In [1]:
import numpy as np
from Source.agent import HDoC_Kano, LUCB_G_Kano, APT_G_Kano
from Source.env import Environment_Bernoulli
from tqdm import tqdm

## Threshold 1

In [2]:
# use Threshold 1 setting, 
K = 10
rlist = np.ones(10)
rlist[0:3] = 0.1
rlist[3:7] = 0.35 + 0.1 * np.arange(4)
rlist[7:10] = 0.9
xi = 0.5
delta = 0.05
qualified_arm_num = np.sum(rlist > xi)

n_exp = 1000

for agent_alg in [HDoC_Kano, LUCB_G_Kano, APT_G_Kano]:
    output_time_ = np.zeros(
        (n_exp, K)
    )  # if correct, there should be only 5 output (not include stop)
    stop_time_ = np.zeros(n_exp)
    correctness_ = np.ones(n_exp)
    for exp_id in tqdm(range(n_exp)):
        env = Environment_Bernoulli(rlist=rlist, K=K, random_seed=exp_id)
        agent = agent_alg(K=K, delta=delta, xi=xi)
        count_stop = 0
        output_list = []
        while not agent.stop:
            arm = agent.action()
            reward = env.response(arm)
            output_arm = agent.observe(reward)
            if output_arm is not None:
                output_list.append(output_arm)
                output_time_[exp_id, count_stop] = agent.t
                count_stop += 1
        stop_time_[exp_id] = agent.t
        if np.any(np.sort(output_list) != np.arange(6, 11)):
            correctness_[exp_id] = 0

    mean_output_time = np.mean(output_time_, axis=0)[0:qualified_arm_num]
    var_output_time = np.sqrt(np.var(output_time_, axis=0) / n_exp)[0:qualified_arm_num]
    mean_stop_time = np.mean(stop_time_)
    var_stop_time = np.sqrt(np.var(stop_time_) / n_exp)
    mean_success = np.mean(correctness_)
    algname = agent.__str__
    print(f"For algorithm {algname}, ")
    print(f"output time is {mean_output_time}, mean stop time is {mean_stop_time}")
    print(f"var of output time is {var_output_time}, var of stop time is {var_stop_time}")
    print(f"mean correctness rate is {mean_success}")

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [02:02<00:00,  8.15it/s]


For algorithm <method-wrapper '__str__' of HDoC_Kano object at 0x0000019B3175B750>, 
output time is [ 112.693  148.261  182.284  728.    5579.467], mean stop time is 10281.5
var of output time is [ 0.85210255  0.88134606  0.94371995  6.93004935 48.35434422], var of stop time is 65.85288066592075
mean correctness rate is 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [02:26<00:00,  6.82it/s]


For algorithm <method-wrapper '__str__' of LUCB_G_Kano object at 0x0000019B3174DF90>, 
output time is [ 127.152  162.75   194.604  813.982 5789.618], mean stop time is 10230.766
var of output time is [ 0.91268992  0.92602889  1.00150646  7.78748764 50.19773352], var of stop time is 65.84916676195682
mean correctness rate is 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:55<00:00,  8.69it/s]

For algorithm <method-wrapper '__str__' of APT_G_Kano object at 0x0000019B31142490>, 
output time is [ 6124.561  6331.86   6532.885  8273.74  10188.843], mean stop time is 10266.644
var of output time is [45.09232385 45.10903449 45.74690971 53.53435955 65.00402056], var of stop time is 65.09744012834913
mean correctness rate is 1.0





## Threshold 2

In [3]:
# use Threshold 2 setting, 
K = 6
rlist = np.arange(1, K+1) * 0.1
xi = 0.35
delta = 0.05
qualified_arm_num = np.sum(rlist > xi)

n_exp = 1000

for agent_alg in [HDoC_Kano, LUCB_G_Kano, APT_G_Kano]:
    output_time_ = np.zeros(
        (n_exp, K)
    )  # if correct, there should be only 5 output (not include stop)
    stop_time_ = np.zeros(n_exp)
    correctness_ = np.ones(n_exp)
    for exp_id in tqdm(range(n_exp)):
        env = Environment_Bernoulli(rlist=rlist, K=K, random_seed=exp_id)
        agent = agent_alg(K=K, delta=delta, xi=xi)
        count_stop = 0
        output_list = []
        while not agent.stop:
            arm = agent.action()
            reward = env.response(arm)
            output_arm = agent.observe(reward)
            if output_arm is not None:
                output_list.append(output_arm)
                output_time_[exp_id, count_stop] = agent.t
                count_stop += 1
        stop_time_[exp_id] = agent.t
        if np.any(np.sort(output_list) != np.arange(4, 7)):
            correctness_[exp_id] = 0

    mean_output_time = np.mean(output_time_, axis=0)[0:qualified_arm_num]
    var_output_time = np.sqrt(np.var(output_time_, axis=0) / n_exp)[0:qualified_arm_num]
    mean_stop_time = np.mean(stop_time_)
    var_stop_time = np.sqrt(np.var(stop_time_) / n_exp)
    mean_success = np.mean(correctness_)
    algname = type(agent).__name__
    print(f"For algorithm {algname}, ")
    print(f"output time is {mean_output_time}, mean stop time is {mean_stop_time}")
    print(f"var of output time is {var_output_time}, var of stop time is {var_stop_time}")
    print(f"mean correctness rate is {mean_success}")

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:57<00:00,  8.49it/s]


For algorithm HDoC_Kano, 
output time is [ 230.14   680.486 5273.337], mean stop time is 9929.466
var of output time is [ 3.49516672  7.40935488 49.10695706], var of stop time is 64.79306310743458
mean correctness rate is 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [02:21<00:00,  7.07it/s]


For algorithm LUCB_G_Kano, 
output time is [ 259.131  750.624 5505.389], mean stop time is 9946.794
var of output time is [ 3.68766292  8.20783532 51.33596657], var of stop time is 66.25230131522979
mean correctness rate is 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [01:50<00:00,  9.03it/s]

For algorithm APT_G_Kano, 
output time is [6812.782 7905.858 9874.331], mean stop time is 9958.161
var of output time is [49.43517016 52.7293842  63.59084519], var of stop time is 63.192374611807395
mean correctness rate is 1.0





## Threshold 3

In [4]:
# use Threshold 3 setting, 
K = 10
rlist = np.ones(10)
rlist[0:3] = 0.55
rlist[3:10] = 0.45
xi = 0.5
delta = 0.05
qualified_arm_num = np.sum(rlist > xi)

n_exp = 1000

for agent_alg in [HDoC_Kano, LUCB_G_Kano, APT_G_Kano]:
    output_time_ = np.zeros(
        (n_exp, K)
    )  # if correct, there should be only 5 output (not include stop)
    stop_time_ = np.zeros(n_exp)
    correctness_ = np.ones(n_exp)
    for exp_id in tqdm(range(n_exp)):
        env = Environment_Bernoulli(rlist=rlist, K=K, random_seed=exp_id)
        agent = agent_alg(K=K, delta=delta, xi=xi)
        count_stop = 0
        output_list = []
        while not agent.stop:
            arm = agent.action()
            reward = env.response(arm)
            output_arm = agent.observe(reward)
            if output_arm is not None:
                output_list.append(output_arm)
                output_time_[exp_id, count_stop] = agent.t
                count_stop += 1
        stop_time_[exp_id] = agent.t
        if np.any(np.sort(output_list) != np.arange(1, 4)):
            correctness_[exp_id] = 0

    mean_output_time = np.mean(output_time_, axis=0)[0:qualified_arm_num]
    var_output_time = np.sqrt(np.var(output_time_, axis=0) / n_exp)[0:qualified_arm_num]
    mean_stop_time = np.mean(stop_time_)
    var_stop_time = np.sqrt(np.var(stop_time_) / n_exp)
    mean_success = np.mean(correctness_)
    algname = type(agent).__name__
    print(f"For algorithm {algname}, ")
    print(f"output time is {mean_output_time}, mean stop time is {mean_stop_time}")
    print(f"var of output time is {var_output_time}, var of stop time is {var_stop_time}")
    print(f"mean correctness rate is {mean_success}")

100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [09:09<00:00,  1.82it/s]


For algorithm HDoC_Kano, 
output time is [ 9000.185 12827.323 15959.258], mean stop time is 46020.593
var of output time is [98.61865932 97.04073004 87.65625422], var of stop time is 146.28277242844078
mean correctness rate is 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [10:56<00:00,  1.52it/s]


For algorithm LUCB_G_Kano, 
output time is [10360.627 14094.633 17075.395], mean stop time is 46008.908
var of output time is [109.47195518 101.20323873  93.1899427 ], var of stop time is 146.31635826364734
mean correctness rate is 1.0


100%|██████████████████████████████████████████████████████████████████████████████| 1000/1000 [08:37<00:00,  1.93it/s]

For algorithm APT_G_Kano, 
output time is [44595.747 45492.469 45896.278], mean stop time is 46051.473
var of output time is [147.29192121 146.59083598 146.27782723], var of stop time is 146.12555674238166
mean correctness rate is 1.0



