In [None]:
from Player import Player
from Trainer import Trainer
from TetrisModel import TetrisModel
from Pretrainer import Pretrainer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import pickle
import glob
import time

In [None]:
piece_dim = 8
key_dim = 12
depth = 16
gamma = 0.99
lam = 0.95
temperature = 1.0

In [None]:
# Use lambda instead of gamma to immitate shape of gae without value predictions
# pretrainer = Pretrainer(gamma=lam)

In [None]:
# players_data = pretrainer._load_data()

In [None]:
# pretrainer._load_dset(players_data)

In [None]:
# max_len = pretrainer._max_len
max_len = 10

In [None]:
# gt_dset = pretrainer._cache_dset()

In [None]:
agent = TetrisModel(piece_dim=piece_dim,
                    key_dim=key_dim,
                    depth=depth,
                    num_heads=4,
                    num_layers=4,
                    max_length=max_len,
                    out_dim=key_dim)

In [None]:
agent_optimizer = keras.optimizers.Adam(1e-4)
agent.compile(optimizer=agent_optimizer)

In [None]:
logits, piece_scores, key_scores = agent((tf.random.uniform((32, 28, 10, 1)),
                                          tf.random.uniform((32, 7), minval=0, maxval=8, dtype=tf.int32),
                                          tf.random.uniform((32, max_len), minval=0, maxval=key_dim, dtype=tf.int32)), return_scores=True)
agent.summary(), tf.shape(logits), tf.shape(piece_scores), tf.shape(key_scores)

In [None]:
critic = TetrisModel(piece_dim=piece_dim,
                     key_dim=key_dim,
                     depth=depth,
                     num_heads=4,
                     num_layers=4,
                     max_length=max_len,
                     out_dim=1)

In [None]:
critic_optimizer = keras.optimizers.Adam(1e-4)
critic.compile(optimizer=critic_optimizer)

In [None]:
values, piece_scores, key_scores = critic((tf.random.uniform((32, 28, 10, 1)),
                                           tf.random.uniform((32, 7), minval=0, maxval=8, dtype=tf.int32),
                                           tf.random.uniform((32, max_len), minval=0, maxval=key_dim, dtype=tf.int32)), return_scores=True)
critic.summary(), tf.shape(values), tf.shape(piece_scores), tf.shape(key_scores)

In [None]:
agent.load_weights('agent_weights_small/agent_finetuned_1803')
critic.load_weights('critic_weights_small/critic_finetuned_1803')

In [None]:
ref_agent = TetrisModel(piece_dim=piece_dim,
                        key_dim=key_dim,
                        depth=depth,
                        num_heads=4,
                        num_layers=4,
                        max_length=max_len,
                        out_dim=key_dim)

In [None]:
logits, piece_scores, key_scores = ref_agent((tf.random.uniform((1, 28, 10, 1)),
                                              tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                                              tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)), return_scores=True)
tf.shape(logits), tf.shape(piece_scores), tf.shape(key_scores)

In [None]:
ref_agent.load_weights('agent_weights_small/agent_finetuned_1803')

In [None]:
# epochs = 10

In [None]:
# actor_losses, critic_losses, accs = pretrainer.train(agent, critic, gt_dset, epochs)

In [None]:
# plt.plot(actor_losses)
# plt.plot(critic_losses)
# plt.plot(accs)

In [None]:
# if 'y' in input('YOU SURE?????'):
#     agent.save_weights('agent_weights_small/agent')
#     critic.save_weights('critic_weights_small/critic')

In [None]:
%matplotlib qt

In [None]:
trainer = Trainer(agent=agent,
                  critic=critic,
                  ref_model=ref_agent,
                  max_len=max_len,
                  gamma=gamma,
                  lam=lam,
                  temperature=temperature,
                  max_episode_steps=100,
                  buffer_cap=1000)

In [None]:
trainer.fill_replay_buffer()

In [None]:
while True:
    trainer.train(gens=100, train_steps=10, training_actor=True)
    agent.save_weights(f'agent_weights_small/agent_finetuned_{trainer.wandb_run.step}')
    critic.save_weights(f'critic_weights_small/critic_finetuned_{trainer.wandb_run.step}')

In [56]:
if 'y' in input('YOU SURE?????'):
    agent.save_weights(f'agent_weights_small/agent_finetuned_{trainer.wandb_run.step}')
    critic.save_weights(f'critic_weights_small/critic_finetuned_{trainer.wandb_run.step}')
trainer.wandb_run.step
    # symbolic_weights = getattr(agent.optimizer, 'weights')
    # weight_values = K.batch_get_value(symbolic_weights)
    # with open('agent_weights_small/optimizer.pkl', 'wb') as f:
    #     pickle.dump(weight_values, f)

YOU SURE????? y


254

In [42]:
episode_data = trainer.player.run_episode(agent, critic, max_steps=100, greedy=False, renderer=trainer.renderer)

In [43]:
episode_boards, episode_pieces, episode_inputs, episode_probs, episode_values, episode_rewards = episode_data

In [44]:
episode_advantages, episode_returns = trainer._compute_gae(episode_values, episode_rewards, trainer.gamma, trainer.lam)

In [48]:
fig, ax = plt.subplots()
ax.plot(episode_returns, label='Returns')
ax.plot(episode_values, label='Values')
ax.legend()
tf.reduce_sum(episode_rewards)

<tf.Tensor: shape=(), dtype=float32, numpy=9.999999>

In [49]:
fig, ax = plt.subplots()
ax.plot(episode_rewards, label='Rewards')
ax.plot(episode_advantages, label='Advantages')
ax.legend()

<matplotlib.legend.Legend at 0x25e44a32ca0>

In [44]:
import numpy as np

In [59]:
board = tf.cast(np.load('tki.npy'), tf.float32)[None, ..., None]

In [60]:
queue = tf.constant([[2, 0, 1, 4, 3, 7, 6]])

In [77]:
inp_seq = tf.constant([[11, 4, 6, 5, 6]])

In [79]:
tf.argmax(agent((board, queue, inp_seq), training=False), axis=-1)

<tf.Tensor: shape=(1, 5), dtype=int64, numpy=array([[4, 6, 5, 6, 8]], dtype=int64)>

In [95]:
fig, ax = plt.subplots()

In [88]:
first_conv = tf.reduce_mean(agent.feature_extraction.layers[0](board), axis=-1)[0]
first_conv = (first_conv - tf.reduce_min(first_conv)) / (tf.reduce_max(first_conv) - tf.reduce_min(first_conv))

In [93]:
first_conv = np.array(first_conv)

In [96]:
ax.imshow(first_conv)

<matplotlib.image.AxesImage at 0x156f40bfac0>

In [97]:
np.save('conv1', first_conv)

In [98]:
second_conv = tf.reduce_mean(agent.feature_extraction.layers[1](agent.feature_extraction.layers[0](board)), axis=-1)[0]
second_conv = (second_conv - tf.reduce_min(second_conv)) / (tf.reduce_max(second_conv) - tf.reduce_min(second_conv))

In [100]:
ax.imshow(second_conv)

<matplotlib.image.AxesImage at 0x156f4350340>

In [101]:
np.save('conv2', second_conv)

In [102]:
third_conv = tf.reduce_mean(agent.feature_extraction.layers[2](agent.feature_extraction.layers[1](agent.feature_extraction.layers[0](board))), axis=-1)[0]
third_conv = (third_conv - tf.reduce_min(third_conv)) / (tf.reduce_max(third_conv) - tf.reduce_min(third_conv))

In [103]:
ax.imshow(third_conv)

<matplotlib.image.AxesImage at 0x156f43672b0>

In [104]:
np.save('conv3', third_conv)