In [1]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import logging

logging.getLogger("tensorflow").setLevel(logging.ERROR)  # this goes *before* tf import

import numpy as np
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPool2D, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError, MSE

import tensorflow as tf

os.environ['AUTOGRAPH_VERBOSITY'] = '1'
tf.autograph.set_verbosity(0)

from tqdm import tqdm

from kaggle_environments import make
from deep_q_learning import DDQN
from my_utils import my_train, my_evaluate, mean_reward

Loading environment lux_ai_s2 failed: No module named 'pettingzoo'


In [2]:
env = make("connectx", debug=False)

state_space_shape = (6, 7)
num_actions = 7
num_episodes = 1000
learning_rate = 0.01
discount_factor = 0.99
batch_size = 256
memory_size = 2048

epsilon = 1
epsilon_min = 0.1
epsilon_decay = 1.5 * (epsilon - epsilon_min) / num_episodes

In [17]:
from tensorflow.python.keras.layers import Conv2D


def build_model():
    i = Input(shape=state_space_shape)
    r = Reshape(target_shape=(6, 7, 1))(i)
    cd = Conv2D(8, kernel_size=(2, 2))(r)
    cd = Conv2D(8, kernel_size=(2, 2))(cd)
    cd = Conv2D(8, kernel_size=(2, 3))(cd)
    cd = Conv2D(8, kernel_size=(3, 3))(cd)
    cd = Reshape(target_shape=(8,))(cd)
    l = Dense(14)(cd)
    l = Dense(7)(l)

    m = Model(inputs=i, outputs=l)
    m.compile(Adam(learning_rate=learning_rate), loss=MeanSquaredError())

    return m

In [18]:
model = build_model()
target_model = build_model()
agent = DDQN(state_space_shape, num_actions, model, target_model, learning_rate, discount_factor, batch_size,
            memory_size)

In [19]:
def my_agent(observation, _):
    _state = process_state(observation)
    _action = agent.get_action(_state, 0)
    _action = postprocess_action(_action)
    return _action


def fixed_agent(observation, _):
    return 3

In [20]:
def process_state(_state):
    return np.asarray(_state.board).reshape((6, 7))


def preprocess_reward(_reward):
    return (10 if _reward == 1 else 1 if _reward == 0 else -10) if _reward is not None else -100


def postprocess_action(_action):
    return round(_action)

In [21]:
configuration = {
    "num_episodes": 200,
    "process_state": process_state, "process_reward": preprocess_reward, "process_action": postprocess_action,
    "epsilon": epsilon, "epsilon_decay": epsilon_decay, "epsilon_min": epsilon_min,
    "save_iter": 200, "save_name": "connectx-DDQN", "prev_count": 0
}

my_train(agent, opponent="random", **configuration)
agent.save('connectx-DDQN', 1000)

print("My Agent vs Random Agent:", mean_reward(my_evaluate("connectx", [my_agent, "random"])))
print("My Agent vs Negamax Agent:", mean_reward(my_evaluate("connectx", [my_agent, "negamax"])))
print("My Agent vs Fixed Agent", mean_reward(my_evaluate("connectx", [my_agent, fixed_agent])))

  2%|▎         | 5/200 [00:27<17:43,  5.46s/it]


KeyboardInterrupt: 