# Training Notebook

Use this to train and interact with an agent in real time rather than having it run in the background

In [1]:
%env WANDB_NOTEBOOK_NAME=/home/jovyan/work/notebooks/Traning Notebook.ipynb

import asyncio
import nest_asyncio
nest_asyncio.apply()

import os
import dotmap
import json
import importlib
import wandb

from poke_env.player.random_player import RandomPlayer
from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer
from poke_env.player.utils import cross_evaluate
from tabulate import tabulate
from poke_env.server_configuration import ServerConfiguration
from poke_env.player_configuration import PlayerConfiguration

from pokebot import BotPlayer
    
server_config = ServerConfiguration(
    "ps:8000",
    "authentication-endpoint.com/action.php?"
)

async def main():
    # We create three random players
    players = [
        RandomPlayer(max_concurrent_battles=10, server_configuration=server_config),
        BotPlayer(max_concurrent_battles=10, server_configuration=server_config),
    ]

    # Now, we can cross evaluate them: every player will player 20 games against every
    # other player.
    cross_evaluation = await cross_evaluate(players, n_challenges=20)

    # Defines a header for displaying results
    table = [["-"] + [p.username for p in players]]

    # Adds one line per player with corresponding results
    for p_1, results in cross_evaluation.items():
        table.append([p_1] + [cross_evaluation[p_1][p_2] for p_2 in results])

    # Displays results in a nicely formatted table.
    print(tabulate(table))


# await main()



env: WANDB_NOTEBOOK_NAME=/home/jovyan/work/notebooks/Traning Notebook.ipynb


[34m[1mwandb[0m: W&B syncing is set to `offline` in this directory.  Run `wandb online` or set WANDB_MODE=online to enable cloud syncing.
2021-05-09 05:53:43,209 - RandomPlayer 1 - ERROR - Multiple exceptions: [Errno 111] Connect call failed ('127.0.0.1', 8000), [Errno 99] Cannot assign requested address
Traceback (most recent call last):
  File "/opt/conda/envs/pb_env/lib/python3.7/asyncio/base_events.py", line 949, in create_connection
    await self.sock_connect(sock, address)
  File "/opt/conda/envs/pb_env/lib/python3.7/asyncio/selector_events.py", line 473, in sock_connect
    return await fut
  File "/opt/conda/envs/pb_env/lib/python3.7/asyncio/futures.py", line 263, in __await__
    yield self  # This tells Task to wait for completion.
  File "/opt/conda/envs/pb_env/lib/python3.7/asyncio/tasks.py", line 318, in __wakeup
    future.result()
  File "/opt/conda/envs/pb_env/lib/python3.7/asyncio/futures.py", line 181, in result
    raise self._exception
  File "/opt/conda/envs/pb

In [2]:
#TODO: pipeline change, define output file in json

async def trainer(hparams, fp):
    p_dict = hparams.policy
    a_dict = hparams.agent

    SEngine = importlib.import_module('pokebot.bots.state_engine')
    se_clazz = getattr(SEngine, hparams.state_engine)
    se_dict = hparams.se_params if hparams.se_params else {}

    player = BotPlayer(
        player_configuration=PlayerConfiguration("JoeNextLine", None),
        server_configuration=server_config,
        state_engine=se_clazz(**se_dict)
    )

    m_lib = importlib.import_module('pokebot.models')
    m_clazz = getattr(m_lib, hparams.model)
    m_dict = hparams.model_params if hparams.model_params else {}

    model = m_clazz(player, **m_dict)

    train_lib = importlib.import_module('pokebot.trainers.trainer')
    t_class = getattr(train_lib, hparams.trainer)
    t_dict = hparams.trainer_params if hparams.trainer_params else {}
    trainer = t_class(player, model, p_dict, a_dict, **t_dict)
    
    if os.path.exists(fp):
        
        trainer.agent.load_weights(fp)

    return trainer

async def train(trainer, fp):
    await trainer.train()

    opponents = [RandomPlayer(battle_format="gen8randombattle", server_configuration=server_config),
                 MaxBasePowerPlayer(battle_format="gen8randombattle", server_configuration=server_config),
                 SimpleHeuristicsPlayer(battle_format="gen8randombattle", server_configuration=server_config)]

    print("Beginning Eval")

    await trainer.evaluate(opponents, logger=None) # TODO: Fix wandb logging

    trainer.agent.save_weights(fp)
    

In [3]:
wandb.init()

PIPELINE_PATH = os.path.join(os.curdir, "hparams.json")
hparams = dotmap.DotMap(json.load(open(PIPELINE_PATH, 'r')))
SAVE_PATH = os.path.join(wandb.run.dir, "test3_randbootstrap.h5")

wandb.config.update(hparams, allow_val_change=True)

save_path = SAVE_PATH

t = await trainer(hparams, save_path)

[34m[1mwandb[0m: You can sync this run to the cloud by running:
[34m[1mwandb[0m: [33mwandb sync /home/jovyan/work/notebooks/wandb/offline-run-20210509_055341-1rn94m6o[0m


[34m[1mwandb[0m: W&B syncing is set to `offline` in this directory.  Run `wandb online` or set WANDB_MODE=online to enable cloud syncing.


In [4]:
await train(t, save_path)

Training for 10000 steps ...
Interval 1 (0 steps performed)
done, took 128.909 seconds
Beginning Eval
Results against player: RandomPlayer 3
DQN Evaluation: 95 victories out of 100 episodes
Results against player: MaxBasePowerPlay 1
DQN Evaluation: 57 victories out of 100 episodes
Results against player: SimpleHeuristics 1
DQN Evaluation: 9 victories out of 100 episodes


In [None]:
# # Load old model from wandb
# model_name = "betterthanrandom.h5"

# RUN_ID = "21wl53z8"

# MODEL_PATH = "notebooks/wandb/run-20200620_164732-3mmjfttq/test2.h5"

# ABS_PATH = "C:\\Users\\nacha\\Desktop\\PokeCode\\pokebot\\notebooks\\wandb\\run-20200620_214116-33ypcx1z\\test3_randbootstrap.h5"

# # weights_file = wandb.restore(model_name, run_path=MODEL_PATH, root=os.curdir)

# t.agent.load_weights(ABS_PATH)

In [5]:
# Test agent against yourself
asyncio.get_event_loop().run_until_complete(t.battle_human("meatout"))

DQN Evaluation: 1 victories out of 1 episodes
