In [95]:
import rats

rats.set_log_level('INFO')

### Run episode: Low level functions

In [96]:
def run_episode(e, a):
    e.reset()
    a.reset()
    
    h = a.get_handler()
    
    while not e.is_over():
        a.play()
        # print(h.get_current_state(), h.get_penalty(), h.get_reward())

    return h.get_reward(), h.get_penalty()

In [97]:
e = rats.InvestorEnv(5, 20)
c1 = rats.ConstantAgent(rats.EnvironmentHandler(e), 1)
r, p = run_episode(e, c1)
print(f"Playing episode with {c1.name()}")
print("reward:", r)
print("penalty:", p)

Playing episode with Constant Agent (1)
reward: 0.0
penalty: 0.0


In [98]:
c1 = rats.ConstantAgent(rats.EnvironmentHandler(e), action=0)

r, p = run_episode(e, c1)
print(f"Playing episode with {c1.name()}")
print("reward:", r)
print("penalty:", p)

Playing episode with Constant Agent (0)
reward: 0.0
penalty: 0.0


In [99]:
dual_uct = rats.DualUCT(rats.EnvironmentHandler(e), max_depth=10, num_sim=100, risk_thd=0.5, gamma=0.9)

r, p = run_episode(e, dual_uct)
print(f"Playing episode with {dual_uct.name()}")
print("reward:", r)
print("penalty:", p)


Playing episode with dual_uct
reward: 0.0
penalty: 0.0


### Run Episode: C++ Orchestrator

In [100]:
rats.set_log_level('INFO')

o = rats.Orchestrator()
e = rats.InvestorEnv(2, 20)
c1 = rats.ConstantAgent(rats.EnvironmentHandler(e), action=0)

o.load_agent(c1)
o.load_environment(e)

o.run(1000)

[2023-08-28 14:49:48.241] [info] Load agent: Constant Agent (0)
[2023-08-28 14:49:48.241] [info] Load environment: InvestorEnv
[2023-08-28 14:49:48.241] [info] Setting agent handler
[2023-08-28 14:49:48.241] [info] Started
[2023-08-28 14:49:48.241] [info]   Agent: Constant Agent (0)
[2023-08-28 14:49:48.241] [info]   Environment: InvestorEnv
[2023-08-28 14:49:48.241] [info] Evaluation phase
[2023-08-28 14:49:48.242] [info] Evaluation results:
[2023-08-28 14:49:48.242] [info]   Mean reward: -5.202899
[2023-08-28 14:49:48.242] [info]   Mean penalty: 0.861992


In [101]:
rats.set_log_level('INFO')

o = rats.Orchestrator()
e = rats.InvestorEnv(2, 20)
uct = rats.DualUCT(rats.EnvironmentHandler(e), max_depth=10, num_sim=100, risk_thd=0.6, gamma=0.9)

o.load_agent(uct)
o.load_environment(e)

o.run(1000)

[2023-08-28 14:49:48.248] [info] Load agent: dual_uct
[2023-08-28 14:49:48.248] [info] Load environment: InvestorEnv
[2023-08-28 14:49:48.248] [info] Setting agent handler
[2023-08-28 14:49:48.248] [info] Started
[2023-08-28 14:49:48.248] [info]   Agent: dual_uct
[2023-08-28 14:49:48.248] [info]   Environment: InvestorEnv
[2023-08-28 14:49:48.248] [info] Evaluation phase
[2023-08-28 14:49:48.726] [info] Evaluation results:
[2023-08-28 14:49:48.726] [info]   Mean reward: -10.160714
[2023-08-28 14:49:48.726] [info]   Mean penalty: 0.831993


In [102]:
rats.set_log_level('INFO')

o = rats.Orchestrator()
e = rats.InvestorEnv(2, 20)
uct = rats.DualUCT(rats.EnvironmentHandler(e), max_depth=10, num_sim=100, risk_thd=0.4, gamma=0.99)

o.load_agent(uct)
o.load_environment(e)

o.run(1000)

[2023-08-28 14:49:48.731] [info] Load agent: dual_uct
[2023-08-28 14:49:48.731] [info] Load environment: InvestorEnv
[2023-08-28 14:49:48.731] [info] Setting agent handler
[2023-08-28 14:49:48.731] [info] Started
[2023-08-28 14:49:48.731] [info]   Agent: dual_uct
[2023-08-28 14:49:48.731] [info]   Environment: InvestorEnv
[2023-08-28 14:49:48.731] [info] Evaluation phase
[2023-08-28 14:49:49.523] [info] Evaluation results:
[2023-08-28 14:49:49.523] [info]   Mean reward: -14.347221
[2023-08-28 14:49:49.523] [info]   Mean penalty: 0.783993
