# Bench

## Base sim (random input)

In [None]:
import random

import ray

from pokerl.env.pokemonblue import PokemonBlueEnv

In [None]:
ray.init(runtime_env={"working_dir": "../"})

In [None]:
# Hyperparameters
max_timesteps = 1000

In [None]:
def simulate():
    env = PokemonBlueEnv()
    for _ in range(max_timesteps):
        action = random.choice(env.action_space_convertissor)
        env.step(action)
    return env.render()


@ray.remote
def simulate_ray():
    simulate()

In [None]:
%%time
for _ in range(20):
    simulate()

In [None]:
%%time
results = ray.get([simulate_ray.remote() for _ in range(40)])

## Bench successive append on numpy array vs list

In [None]:
amount_of_data = 100

state = [[[random.randint(0, 255) for _ in range(100)] for _ in range(100)] for _ in range(amount_of_data)]
action = [random.randint(0, 10) for _ in range(amount_of_data)]
reward = [random.randint(0, 1) for _ in range(amount_of_data)]
next_state = [[[random.randint(0, 255) for _ in range(100)] for _ in range(100)] for _ in range(amount_of_data)]
done = [0 for _ in range(amount_of_data)]

In [None]:
%%time
from collections import deque  # noqa: E402

memory = deque(maxlen=amount_of_data * 1000)
for i in range(amount_of_data * 1000):
    i_ = i % amount_of_data
    memory.append((state[i_], action[i_], reward[i_], next_state[i_], done[i_]))

In [None]:
%%time
import numpy as np  # noqa: E402

memory = np.zeros((amount_of_data * 1000, 5, 100, 100))
for i in range(amount_of_data):
    memory[i, 0] = state[i]
    memory[i, 1] = action[i]
    memory[i, 2] = reward[i]
    memory[i, 3] = next_state[i]
    memory[i, 4] = done[i]

Conclusion: deque is fastest