# Training Notebook

Use this to train and interact with an agent in real time rather than having it run in the background

In [1]:
%env WANDB_NOTEBOOK_NAME=Training_Loop

%load_ext autoreload
%autoreload 2

# Imports
import dotmap
import json
import asyncio
import os
import importlib
import wandb
import nest_asyncio
nest_asyncio.apply()

from poke_env.player.random_player import RandomPlayer
from poke_env.player_configuration import PlayerConfiguration
from poke_env.player.baselines import MaxBasePowerPlayer, SimpleHeuristicsPlayer

from pokebot import BotPlayer

wandb.init(reinit=False)
wandb.run.save()
# Constants

PIPELINE_PATH = os.path.join(os.curdir, "hparams.json")
SAVE_PATH = os.path.join(wandb.run.dir, "betterthanrandom.h5")

env: WANDB_NOTEBOOK_NAME=Training_Loop


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
#TODO: pipeline change, define output file in json

async def trainer(hparams, fp):
    p_dict = hparams.policy
    a_dict = hparams.agent

    SEngine = importlib.import_module('pokebot.bots.state_engine')
    se_clazz = getattr(SEngine, hparams.state_engine)
    se_dict = hparams.se_params if hparams.se_params else {}

    player = BotPlayer(
        player_configuration=PlayerConfiguration("JoeNextLine", "underground"),
        state_engine=se_clazz(**se_dict)
    )

    m_lib = importlib.import_module('pokebot.models')
    m_clazz = getattr(m_lib, hparams.model)
    m_dict = hparams.model_params if hparams.model_params else {}

    model = m_clazz(player, **m_dict)

    train_lib = importlib.import_module('pokebot.trainers.trainer')
    t_class = getattr(train_lib, hparams.trainer)
    t_dict = hparams.trainer_params if hparams.trainer_params else {}
    trainer = t_class(player, model, p_dict, a_dict, **t_dict)
    
    if os.path.exists(fp):
        
        trainer.agent.load_weights(fp)

    return trainer

async def train(trainer, fp):
    await trainer.train()

    opponents = [RandomPlayer(battle_format="gen8randombattle"),
                 MaxBasePowerPlayer(battle_format="gen8randombattle"),
                 SimpleHeuristicsPlayer(battle_format="gen8randombattle")]

    print("Beginning Eval")

    await trainer.evaluate(opponents)

    trainer.agent.save_weights(fp)
    

In [3]:
hparams = dotmap.DotMap(json.load(open(PIPELINE_PATH, 'r')))
SAVE_PATH = os.path.join(wandb.run.dir, "test3_randbootstrap.h5")

wandb.config.update(hparams, allow_val_change=True)

save_path = SAVE_PATH

t = await trainer(hparams, save_path)

In [4]:
# Load old model from wandb
model_name = "betterthanrandom.h5"

RUN_ID = "21wl53z8"

MODEL_PATH = "notebooks/wandb/run-20200620_164732-3mmjfttq/test2.h5"

ABS_PATH = "C:\\Users\\nacha\\Desktop\\PokeCode\\pokebot\\notebooks\\wandb\\run-20200620_214116-33ypcx1z\\test3_randbootstrap.h5"

# weights_file = wandb.restore(model_name, run_path=MODEL_PATH, root=os.curdir)

t.agent.load_weights(ABS_PATH)

[autoreload of tensorflow.python.keras.layers.core failed: Traceback (most recent call last):
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\site-packages\IPython\extensions\autoreload.py", line 394, in superreload
    module = reload(module)
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\imp.py", line 315, in reload
    return importlib.reload(module)
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\importlib\__init__.py", line 147, in reload
    raise ImportError(msg.format(name), name=name)
ImportError: module rl.agents.dqn not in sys.modules
]


In [4]:
await train(t, save_path)

Training for 10000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 12:18 - reward: 0.0000e+00

[autoreload of tensorflow.python.keras.layers.core failed: Traceback (most recent call last):
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\site-packages\IPython\extensions\autoreload.py", line 394, in superreload
    module = reload(module)
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\imp.py", line 315, in reload
    return importlib.reload(module)
  File "C:\Users\nacha\Miniconda3\envs\pk_env\lib\importlib\__init__.py", line 147, in reload
    raise ImportError(msg.format(name), name=name)
ImportError: module rl.agents.dqn not in sys.modules
]


done, took 101.608 seconds
Beginning Eval
Results against player: RandomPlayer 3
DQN Evaluation: 97 victories out of 100 episodes
Results against player: MaxBasePowerPlayer 1
DQN Evaluation: 63 victories out of 100 episodes
Results against player: SimpleHeuristicsPlayer 1
DQN Evaluation: 19 victories out of 100 episodes


In [5]:
# Test agent against yourself
asyncio.get_event_loop().run_until_complete(t.battle_human("meatout"))

DQN Evaluation: 0 victories out of 1 episodes


KeyboardInterrupt: 