# For Experiment Demo

In [5]:
# import packages
import numpy as np
import pandas as pd

from sklearn.model_selection import ParameterGrid
from tqdm import tqdm
import pandas as pd
import os

# please set up your own folder path
from Source.agent import *
from Source.env import *
from Source.utils import *
from Source.utils_trap import *

# please set up your own folder path
filefolder = "./"

In [None]:
# Demo of experiment code

## set up the experiment
agent_list = [
    Uniform_Agent,
    UCB_Agent,
    SequentialHalvingRR_Recycle_FailureFlag_History_Agent,
    DoublingSequentialHalving_Agent,
]

env_list = [
    Env_Uncorrelated_Reward,
    Env_Correlated_Uniform,
]

parameter_list = [
    Balanced_Trap_HalfConsumption_Fix32_MultiR_HL,
    Balanced_Trap_HalfConsumption_Fix32_MultiR_Mixture,
    Balanced_Trap_HalfConsumption_Fix32_MultiR_HH,
    
    One_suboptimal_MultiR_HL,
    One_suboptimal_MultiR_Mixture,
    One_suboptimal_MultiR_HH,
    
    Geometry_MultiR_HL,
    Geometry_MultiR_Mixture,
    Geometry_MultiR_HH,
    Poly_MultiR_HL,
    Poly_MultiR_Mixture,
    Poly_MultiR_HH,
]

K_list = [
    256,
]

C_list = [
    np.array([1500., 1500.]),
]

L_list = [2]

dhigh_list = [0.9]
dlow_list = [0.1]

experiment_grid = ParameterGrid({
    "para": parameter_list,
    "agent": agent_list,
    "env": env_list,
    "K": K_list,
    "C": C_list,
    "L": L_list,
    "dhigh": dhigh_list,
    "dlow": dlow_list,
})

for exp_setting_id, experiment in enumerate(experiment_grid):
    # derive the problem setting
    K = experiment["K"]
    L = experiment["L"]
    C = experiment["C"]
    
    reward, demand = experiment["para"](K=experiment["K"], dhigh=experiment["dhigh"], dlow=experiment["dlow"])
    r_or_p = False
    price = np.ones(K)
    env_class = experiment["env"]
    env_para = dict()
    agent_class = experiment["agent"]
    agent_para = dict()
    n_experiment = 1000
    
    numeric_record = pd.DataFrame(
        columns=[
            "C", "K", "L", "n_experiment", "para-type", "env", "agent", 
            "success", "std_success_rate",
            "reward_array", "demand_array", "mean_stop_times",
        ]
    )
    
    # conduct the experiment
    success_rate, std_success_rate, stop_times, _, _ = Experiment_MultiR(
        reward=reward,
        demand=demand,
        price=price,
        r_or_p=r_or_p,
        env_class=env_class,
        env_para=env_para,
        agent_class=agent_class,
        agent_para=agent_para,
        n_experiment=n_experiment,
        K=K,
        C=C,
        L=L,
        disable_tqdm=False, # for personal laptop
    )
    
    ## you can use eval(cell in numeric_record) to restore the array
    ## save experiment record
    filename = filefolder + agent_class.__name__ + ".csv"
    numeric_record.loc[0] = np.array(
        [
            C, K, L, n_experiment, experiment['para'].__name__, 
            env_class.__name__, agent_class.__name__,
            success_rate, std_success_rate,
            reward, demand,
            # parameter_dict[experiment['para']][2],
            stop_times
        ],
        dtype=object,
    )
    numeric_record.to_csv(filename, mode='a', index=False, header=not os.path.exists(filename))
    print(f"{agent_class.__name__}, {experiment['para'].__name__}, {env_class.__name__}, {K}, {C}, {exp_setting_id+1}/{len(experiment_grid)} done, success {success_rate}, std {std_success_rate}")