In [1]:
import os
from game import Game
import logging
from board import Direction
import random
import math

import numpy as np

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras import layers



print(f"GPU {'available' if tf.config.list_physical_devices('GPU') else 'not available'}")

physical_devices = tf.config.experimental.list_physical_devices("GPU")
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)

2022-08-31 02:10:06.877139: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


GPU available


2022-08-31 02:10:07.553206: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-31 02:10:07.553659: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-31 02:10:07.612246: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-31 02:10:07.612453: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.83GHz coreCount: 30 deviceMemorySize: 5.79GiB deviceMemoryBandwidth: 312.97GiB/s
2022-08-31 02:10:07.612468: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-08-31 02:10:07.613404: I tensorflow/stream_executor/platform/def

In [2]:
def board_to_tensor(input_data):
    return tf.expand_dims(
        tf.expand_dims(
            tf.convert_to_tensor(
                input_data.repr("int") / 4
            ),
            axis=-1
        ),
        axis=0
    )

def to_key(digit):
    return Direction(digit)


class EpochDots(tf.keras.callbacks.Callback):
    """Makes 'model.fit' print dots at each epoch and print the
    current epoch number in real time
    Eg.\n
    10   .......... \n
    14   ....
    NOTE: Make sure to set verbose == 0
    """

    def __init__(self, dots_per_line=10, epochs_per_dot=1):
        """
        Args:
            dots: dots per line
        """
        super().__init__()
        self.dots_per_line = dots_per_line
        self.epochs_per_dot = epochs_per_dot


    def on_epoch_end(self, epoch, logs={}):
        lr = self.model.optimizer.lr.numpy()
        padding = round(math.log(self.params['epochs'], 10)) + 1
        epoch = epoch + 1  # epochs start counting at 0 for computer, this adds 1 for human
        if epoch % self.epochs_per_dot == 0:
            if epoch % (self.dots_per_line * self.epochs_per_dot) == 0:  # Every 100 epochs
                print(f"\r{epoch:{padding}} " + "." * self.dots_per_line, logs, f"lr: {lr}", end="\n")
            else:
                dots = int(epoch / self.epochs_per_dot) % self.dots_per_line
                print(f"\r{epoch:{padding}} "+ f"{'.'*dots:{self.dots_per_line}}", logs, f"lr: {lr}",  end="")

class EarlyStopping(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('accuracy') >= 0.95:
            print("\nReached >= 95% accuracy so cancelling training!")
            self.model.stop_training = True


In [3]:
logging.basicConfig(filename="logs.txt",
                    level=logging.CRITICAL,
                    filemode="w",
                    format="%(levelname) -9s %(module)s:%(lineno)s %(funcName) -10s %(message)s")

# Game parameters
HEIGHT = 20
WIDTH = 20



# TODO: add convolutions
# TODO: get rid of magic number 4
model = Sequential([
    # Flatten(input_shape=(HEIGHT, WIDTH)),
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(HEIGHT, WIDTH, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    # layers.Conv2D(64, (3, 3), activation='relu'),

    layers.Flatten(),
    layers.Dense(4096, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(4096, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(4096, activation='relu'),

    layers.Dense(4)
])

model.summary()
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 18, 18, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 9, 9, 32)          0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 7, 7, 64)          18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 3, 3, 64)          0         
_________________________________________________________________
flatten (Flatten)            (None, 576)               0         
_________________________________________________________________
dense (Dense)                (None, 4096)              2363392   
_________________________________________________________________
dropout (Dropout)            (None, 4096)              0

2022-08-31 02:10:07.648802: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-31 02:10:07.648994: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-31 02:10:07.649109: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-31 02:10:07.649308: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2060 computeCapability: 7.5
coreClock: 1.83GHz coreCount: 30 deviceMemorySize: 5.79GiB deviceMemoryBandwidth: 312.97Gi

In [6]:
GENERATIONS = 3
GAMES = 100  # Number of games to play before updating the model
KEEP = 3  # games to keep for training
MAX_STEPS = 20 # Maximum number of steps per game

max_reward = -100
for generation in range(GENERATIONS):
    randomness = 1 * math.exp(-2*generation/GENERATIONS)

    observations = np.full(
        (GAMES, MAX_STEPS, HEIGHT, WIDTH, 1),
        -1,
        np.float32
    )
    predictions = np.full(
        (GAMES, MAX_STEPS, 1),
        -1,
        dtype=np.int8
    )

    # HACK: give a better default value
    rewards = np.full(GAMES, -10000, dtype=np.int32)
    print(f"Generation: {generation}")
    print(f"Randomness: {randomness}")

    # Loop over games
    for game_idx in range(GAMES):
        # Initialize game
        game = Game(HEIGHT, WIDTH)

        # Initial step information
        observation = board_to_tensor(game.board)
        terminated = False
        truncated = False
        total_reward = 0

        # Play game
        step = 0
        while step < MAX_STEPS and not (terminated or truncated):
            model_prediction = tf.argmax(
                tf.math.softmax(model(observation)),
                axis=1
            )

            random_prediction = random.randint(0, 3)

            prediction = random_prediction if random.random() < randomness else model_prediction

            observations[game_idx, step] = observation
            predictions[game_idx, step] = prediction

            key = to_key(prediction)
            observation, reward, terminated, truncated, info = game.step(key)
            observation = board_to_tensor(observation)

            total_reward += reward
            step += 1

        rewards[game_idx] = total_reward





    max_args = np.argsort(rewards)[-KEEP:]
    min_args = np.argsort(rewards)[:KEEP]
    print(f"Max reward: {rewards[max_args]}")
    print(f"Min reward: {rewards[min_args]}")

    # if rewards[max_args][-1] <= max_reward:
    #     print("Skipping")
    #     continue
    # else:
    #     max_reward = rewards[max_args][-1]



    # Only use the best games for training
    observations = observations[max_args]
    predictions = predictions[max_args]

    # Concatenate games
    observations = observations.reshape(-1, HEIGHT, WIDTH, 1)
    predictions = predictions.reshape(-1, 1)



    # Remove any steps that were not played
    good_indices = (predictions > 0).reshape(-1)
    observations = observations[good_indices]
    predictions = predictions[good_indices]


    GEN_DIR = "generations"
    os.makedirs(GEN_DIR, exist_ok=True)
    plays = os.path.join(GEN_DIR, f"gen_{generation}")
    np.savez(plays, observations)



    print(f"Training on {len(observations)} steps")

    model.fit(
        observations,
        predictions,
        batch_size=1024,
        epochs=10000,
        callbacks=[
            EpochDots(dots_per_line=80, epochs_per_dot=10),
            EarlyStopping()
        ],
        verbose=0
    )



Generation: 0
Randomness: 1.0
[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25h[?25hMax reward: [-100 -100 -100]
Min reward: [-100 -100 -100]
Training on 9 steps
   10 .                                                                                {'loss': 0.38676849007606506, 'accuracy': 0.8888888955116272} lr: 0.0010000000474974513
Reached >= 95% accuracy so cancelling training!
Generation: 1
Randomness: 0.513417119032592
[?25h[?25h[?25h[?25h

In [5]:

import time
game = Game(HEIGHT, WIDTH)
observation = game.board.tensor()
terminated = False
truncated = False
step = 0
while step < MAX_STEPS and not (terminated or truncated):
    model_prediction = tf.argmax(
        tf.math.softmax(model(observation)),
        axis=1
    )
    key = to_key(model_prediction)
    observation, reward, terminated, truncated, info = game.step(key)
    game.render()
    time.sleep(0.1)
    step += 1





[?25h

AttributeError: 'Game' object has no attribute 'render'