In [1]:
import asyncio

import numpy as np
from gym.spaces import Box, Space
from gym.utils.env_checker import check_env
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy
from tabulate import tabulate
from keras.layers import Dense, Flatten
from keras.models import Sequential
from keras.optimizers.legacy import Adam

from poke_env.environment.abstract_battle import AbstractBattle
from poke_env.environment import Move
from poke_env.player import (
    Player,
    Gen8EnvSinglePlayer,
    MaxBasePowerPlayer,
    ObsType,
    RandomPlayer,
    SimpleHeuristicsPlayer,
    background_cross_evaluate,
    background_evaluate_player,
    wrap_for_old_gym_api,
)
from poke_env.data import GenData


In [2]:
%cd ~/cs/15888/poke
!ls

/Users/richardzhan/cs/15888/poke
[1m[36mdata[m[m                [1m[36mpokemon-showdown[m[m    [1m[36mscripts[m[m
poke.code-workspace [1m[36mpython[m[m              [1m[36msrc[m[m


In [3]:
import sys
sys.path.append("./python")

In [4]:
%load_ext autoreload
%autoreload 2
from rzlib.env import embed
from rzlib.env.simple_rl_player import SimpleRLPlayer

In [5]:
_battle_format = "gen8ou"
_team1_fname = "./data/team1.txt"
_team2_fname = "./data/team2.txt"

In [6]:
def load_team(fname):
    # load_bot()
    with open(fname, "r") as f:
        return "".join(f.readlines())

def create_random_player():
    return RandomPlayer(battle_format=_battle_format, team=load_team(_team1_fname))

def create_max_power_player():
    return MaxBasePowerPlayer(battle_format=_battle_format, team=load_team(_team1_fname))

def create_rl_player(opponent, **kwargs):
    return SimpleRLPlayer(
        battle_format=_battle_format,
        opponent=opponent,
        start_challenging=True,
        team=load_team(_team2_fname),
        **kwargs
    )

def create_rl_env_random(sanity_check=True):
    print("create_rl_env_random()...")
    if sanity_check:
        # First test the environment to ensure the class is consistent
        # with the OpenAI API
        sanity_check_env = create_rl_player(opponent=create_random_player())
        check_env(sanity_check_env)
        sanity_check_env.close()

    # Create one environment for training and one for evaluation
    train_env = create_rl_player(opponent=create_random_player())
    train_env = wrap_for_old_gym_api(train_env)

    eval_env = create_rl_player(opponent=create_random_player())
    eval_env = wrap_for_old_gym_api(eval_env)

    return train_env, eval_env


def create_model(train_env, nb_steps=10000):
    print("create_model()...")
    # Compute dimensions
    n_action = train_env.action_space.n
    input_shape = (1,) + train_env.observation_space.shape

    # Create model
    model = Sequential()
    model.add(Dense(128, activation="elu", input_shape=input_shape))
    model.add(Flatten())
    model.add(Dense(64, activation="elu"))
    model.add(Dense(n_action, activation="linear"))

    # Defining the DQN
    memory = SequentialMemory(limit=nb_steps, window_length=1)

    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(),
        attr="eps",
        value_max=1.0,
        value_min=0.05,
        value_test=0.0,
        nb_steps=nb_steps,
    )

    dqn = DQNAgent(
        model=model,
        nb_actions=n_action,
        policy=policy,
        memory=memory,
        nb_steps_warmup=1000,
        gamma=0.99,
        target_model_update=1000,
        delta_clip=0.01,
        enable_double_dqn=True,
    )
    dqn.compile(Adam(learning_rate=0.00025), metrics=["mae"])


    return dqn

def train_model(*, dqn: DQNAgent, train_env):
    print("train_model()...", dqn.policy.nb_steps)
    dqn.fit(train_env, nb_steps=dqn.policy.nb_steps)
    train_env.close()

def eval_model(*, agent: DQNAgent, eval_env, opponent: Player, num_eval_episodes=100):
    print("eval_model()...")

    eval_env.reset_env(restart=True, opponent=opponent)
    print(f"Results against {opponent.username}:")
    agent.test(eval_env, nb_episodes=num_eval_episodes, verbose=False, visualize=False)
    print(
        f"DQN Evaluation: {eval_env.n_won_battles} victories out of {eval_env.n_finished_battles} episodes"
    )


In [7]:

train_env, eval_env = create_rl_env_random()

# create the model
dqn = create_model(
    train_env=train_env,
    nb_steps=100000,
)

# Training the model
train_model(
    dqn=dqn, 
    train_env=train_env,
)

# Evaluating the model
eval_model(
    agent=dqn,
    eval_env=eval_env,
    opponent=create_random_player(),
    num_eval_episodes=100,
)
eval_model(
    agent=dqn,
    eval_env=eval_env,
    opponent=create_max_power_player(),
    num_eval_episodes=100,
)

create_rl_env_random()...


  logger.warn(
  logger.warn(
  if not isinstance(terminated, (bool, np.bool8)):


create_model()...
train_model()... 100000
Training for 100000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 8:30 - reward: 0.0000e+00

2023-11-29 10:02:15.793919: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2023-11-29 10:02:15.803550: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_2_1/bias/Assign' id:184 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/bias/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_2_1/bias, dense_2_1/bias/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
  updates=self.state_updates,
2023-11-29 10:02:15.938470: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_2/BiasAdd' id:95 op device:{requested: '', assigned: ''} def:{{{node dense_2/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2/MatMul, dense_2/BiasAdd/ReadVariableOp)}

 1002/10000 [==>...........................] - ETA: 3:01 - reward: -0.9116

2023-11-29 10:02:35.990326: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_2_1/BiasAdd' id:189 op device:{requested: '', assigned: ''} def:{{{node dense_2_1/BiasAdd}} = BiasAdd[T=DT_FLOAT, _has_manual_control_dependencies=true, data_format="NHWC"](dense_2_1/MatMul, dense_2_1/BiasAdd/ReadVariableOp)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-11-29 10:02:36.053861: W tensorflow/c/c_api.cc:305] Operation '{name:'loss_3/AddN' id:411 op device:{requested: '', assigned: ''} def:{{{node loss_3/AddN}} = AddN[N=2, T=DT_FLOAT, _has_manual_control_dependencies=true](loss_3/mul, loss_3/mul_1)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2023-11-29 10:02:3



In [None]:
train_env.close()

In [None]:
eval_env.close()

KeyboardInterrupt: 