# Model customisation & experiments

This is an alternative to running `./main.py`

In [1]:
# if the server isn't running already, start it
#!make -C ../ server

## Custom model

In [2]:
import os

TICK = 100 # speed of the game [milliseconds]
TETRIS_SERVER_URL = "http://localhost:8888"

# Silence the cretinous nagging of TensorFlow:
# "This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
# "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags."
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # Note: needs to be set before importing tensorflow

# in Jupyter notebooks, we need the async version of playwright
from tetris_control_async import control as ctrl
from model import Model


num_iterations = 1 # number of games to play per training session
off_policy = True # set to false to train on-policy

In [3]:
import numpy as np
import tensorflow as tf

my_model = tf.keras.Sequential()

# Comment this out so that the shape is not specified until the model is first instantiated
# myShape = encode_state(get_state()).shape
# model.add(tf.keras.layers.Input(input_shape=myShape))

my_model.add(tf.keras.layers.Dense(256, activation='relu'))

numOutputs = 4 * 10 # 4 rotations × 10 positions
my_model.add(tf.keras.layers.Dense(numOutputs, activation='softmax'))

# Compile the model.
my_model.compile(optimizer='adam', loss='mse')

## Custom model trainer

Note: Use `self.maybeLoadWeights()` and `self.maybeSaveWeights()` to persist the model state across invocations.

In [4]:
# We use this to shut the verbose output of model.fit() & model.predict() right up
import sys
class stdout_redirected(object):
    def __init__(self, to="/dev/null"):
        self.to = to

    def __enter__(self):
        self.sys_stdout = sys.stdout
        sys.stdout = open(self.to, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self.sys_stdout


In [6]:
import time
from move import Move
from reward import Reward

def my_trainer(self, num_iterations, offPolicy=True):
    """Train the model."""
    # Track the elapsed time.
    self.start_time = time.time()

    games_played = 0
    print ("Iteration: " + str(games_played + 1) + "/" + str(num_iterations))
    while games_played < num_iterations:
        if self.control.is_game_over():
            self.control.new_game()
            games_played += 1
            if games_played < num_iterations:
                print ("Iteration: " + str(games_played + 1) + "/" + str(num_iterations))
            else:
                print ("\nTraining complete.")
            continue

        # Get the current state of the game.
        state = self.state.get_state()
        state_encoded = self.state.encode_state(state)

        # Get all the possible plays.
        move = Move(self.control)
        possible_plays = move.all_possible_end_states()
        boards_after = [play["board_after"] for play in possible_plays]
        rewards = [Reward(state, board).get_reward() for board in boards_after]
        batch_size = len(rewards) # 40
        rewards_softmax = self.softmax(np.array(rewards).reshape(1, batch_size))

        # Choose an action.
        with stdout_redirected("/dev/null"):
            prediction = self.model.predict(state_encoded)
        self.maybeLoadWeights() # We have called the model, so now the model knows its input shape, so we can load weights
        if offPolicy:
            actionChoice = np.argmax(rewards)
            assert(actionChoice == np.argmax(rewards_softmax))
        else:
            actionChoice = np.argmax(prediction)

        #print("piece:", state["piece"]["type"], "position:", possible_plays[actionChoice]["position"], "rotation:", possible_plays[actionChoice]["rotation"], "reward:", rewards[actionChoice], "(" + str(rewards[np.argmax(prediction)] - rewards[np.argmax(rewards)]) + ")")
        print(state["piece"]["type"], end="")

        # Take the action.
        motion = possible_plays[actionChoice]["motion"]
        move.perform_motion(motion, True)

        time.sleep(self.control.get_tick()/1000.0) # we don't collect the state after the tick, so this is just for show -- XXX there seems to be some race condition and if we don't sleep(), the training moves are weird

        # Update the model.
        with stdout_redirected("/dev/null"):
            self.model.fit(state_encoded, rewards_softmax, epochs=1, batch_size=1, verbose=0)
        self.maybeSaveWeights()

In [7]:
async with ctrl(TETRIS_SERVER_URL) as control:
    Model.train_model = my_trainer # override Model.train_model()
    model = Model(control)
    control.set_tick(TICK)
    model.model = my_model # override Model.model
    model.train_model(num_iterations, off_policy)

self.playwright: <playwright._impl._playwright.Playwright object at 0x7f8bc400f5b0>
Iteration: 1/1
Loaded weights from file autopilot-model-weights.h5
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               58112     
                                                                 
 dense_1 (Dense)             (None, 40)                10280     
                                                                 
Total params: 68,392
Trainable params: 68,392
Non-trainable params: 0
_________________________________________________________________
Model summary: None (None, 226) (None, 40)
OTSSJOIIZLLOTOIJSSTZTOJTISLIOJJLJZOZTOILTIZSZSOLJTJSSTOJOJTTISZJIJZLOILTTLSSTLTTILSSTSOILLSSITLJJJISISSTJZIZJZLLZZSJJSJIOJIJJLZZOLLZJZTITraining complete.
