In [1]:
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import logging

logging.getLogger("tensorflow").setLevel(logging.ERROR)  # this goes *before* tf import

import numpy as np
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPool2D, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError, MSE

import tensorflow as tf

os.environ['AUTOGRAPH_VERBOSITY'] = '1'
tf.autograph.set_verbosity(0)

from tqdm import tqdm

from kaggle_environments import make
from deep_q_learning import DQN
from my_utils import my_train, my_evaluate, mean_reward

Loading environment lux_ai_s2 failed: No module named 'pettingzoo'


In [2]:
env = make("connectx", debug=False)

state_space_shape = (6, 7)
num_actions = 7
num_episodes = 1000
learning_rate = 0.01
discount_factor = 0.975
batch_size = 128
memory_size = 2048

epsilon = 1
epsilon_min = 0.01
epsilon_decay = 1.25 * (epsilon - epsilon_min) / num_episodes

In [3]:
from tensorflow.python.keras.layers import Conv2D


def build_model():
    i = Input(shape=state_space_shape)
    r = Reshape(target_shape=(6, 7, 1))(i)

    cv = Conv2D(16, kernel_size=(2, 1))(r)
    cv = Conv2D(16, kernel_size=(3, 1))(cv)
    cv = Conv2D(1, kernel_size=(3, 1))(cv)
    cv = Reshape(target_shape=(7,))(cv)
    cv = Dense(7)(cv)

    ch = Conv2D(16, kernel_size=(2, 1))(r)
    ch = Conv2D(16, kernel_size=(3, 7))(ch)
    ch = Conv2D(16, kernel_size=(3, 1))(ch)
    ch = Reshape(target_shape=(16,))(ch)
    ch = Dense(7)(ch)

    cd = Conv2D(7, kernel_size=(2, 3))(r)
    cd = Conv2D(7, kernel_size=(3, 3))(cd)
    cd = Conv2D(7, kernel_size=(3, 3))(cd)
    cd = Reshape(target_shape=(7,))(cd)
    cd = Dense(7)(cd)

    l = cv + ch + cd
    l = Dense(7)(l)
    l = Dense(7)(l)

    m = Model(inputs=i, outputs=l)
    m.compile(Adam(learning_rate=learning_rate), loss=MeanSquaredError())

    return m

In [4]:
model = build_model()
target_model = build_model()
agent = DQN(state_space_shape, num_actions, model, target_model, learning_rate, discount_factor, batch_size,
            memory_size)

In [5]:
def process_state(_state):
    return np.asarray(_state.board).reshape((6, 7))


def preprocess_reward(_reward):
    return (4 if _reward == 1 else 1 if _reward == 0 else -1) if _reward is not None else -4


def postprocess_action(_action):
    return round(_action)

In [None]:
configuration = {
    "num_episodes": 1000,
    "process_state": process_state, "process_reward": preprocess_reward, "process_action": postprocess_action,
    "epsilon": epsilon, "epsilon_decay": epsilon_decay, "epsilon_min": epsilon_min,
    "save_iter": 100, "save_name": "connectx-conv", "prev_count": 0
}

my_train(agent, opponent="negamax", **configuration)

In [None]:
agent.save('connectx-conv', 1000)

In [None]:
agent.load('connectx-conv', 1000)

In [6]:
def my_agent(observation, _):
    _state = process_state(observation)
    _action = agent.get_action(_state, 0)
    _action = postprocess_action(_action)
    return _action


def fixed_agent(observation, _):
    return 3

In [None]:
print("Random Agent vs Negamax Agent:", mean_reward(my_evaluate("connectx", ["random", "negamax"])))
print("Fixed Agent vs Random Agent:", mean_reward(my_evaluate("connectx", [fixed_agent, "random"])))
print("Fixed Agent vs Negamax Agent:", mean_reward(my_evaluate("connectx", [fixed_agent, "negamax"])))

In [9]:
print("My Agent vs Random Agent:", mean_reward(my_evaluate("connectx", [my_agent, "random"])))
print("My Agent vs Negamax Agent:", mean_reward(my_evaluate("connectx", [my_agent, "negamax"])))
print("My Agent vs Fixed Agent", mean_reward(my_evaluate("connectx", [my_agent, fixed_agent])))

100%|██████████| 100/100 [00:38<00:00,  2.61it/s]


My Agent vs Random Agent: W: 60; L: 14; M: 26; O: 0


100%|██████████| 100/100 [02:01<00:00,  1.21s/it]


My Agent vs Negamax Agent: W: 0; L: 43; M: 57; O: 0


100%|██████████| 100/100 [00:21<00:00,  4.65it/s]

My Agent vs Fixed Agent W: 100; L: 0; M: 0; O: 0





In [7]:
agent.load('connectx-conv', 1000)

configuration = {
    "num_episodes": 1000,
    "process_state": process_state, "process_reward": preprocess_reward, "process_action": postprocess_action,
    "epsilon": epsilon, "epsilon_decay": epsilon_decay, "epsilon_min": epsilon_min,
    "save_iter": 250, "save_name": "connectx-conv", "prev_count": 1000
}

my_train(agent, opponent="negamax", **configuration)

print("My Agent vs Random Agent:", mean_reward(my_evaluate("connectx", [my_agent, "random"])))
print("My Agent vs Negamax Agent:", mean_reward(my_evaluate("connectx", [my_agent, "negamax"])))
print("My Agent vs Fixed Agent", mean_reward(my_evaluate("connectx", [my_agent, fixed_agent])))

 48%|████▊     | 475/1000 [08:23<07:57,  1.10it/s]

In [7]:
agent.load("connectx-conv", 2000)
print("My Agent vs Random Agent:", mean_reward(my_evaluate("connectx", [my_agent, "random"])))
print("My Agent vs Negamax Agent:", mean_reward(my_evaluate("connectx", [my_agent, "negamax"])))
print("My Agent vs Fixed Agent", mean_reward(my_evaluate("connectx", [my_agent, fixed_agent])))

100%|██████████| 100/100 [00:24<00:00,  4.11it/s]


My Agent vs Random Agent: W: 75; L: 1; M: 24; O: 0


100%|██████████| 100/100 [01:16<00:00,  1.30it/s]


My Agent vs Negamax Agent: W: 0; L: 9; M: 91; O: 0


100%|██████████| 100/100 [00:21<00:00,  4.65it/s]

My Agent vs Fixed Agent W: 100; L: 0; M: 0; O: 0



