In [1]:
from Player import Player
from Trainer import Trainer
from TetrisModel import TetrisModel
from Pretrainer import Pretrainer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import pickle
import glob
import time

In [2]:
piece_dim = 8
key_dim = 12
depth = 16
gamma = 0.99
lam = 0.95
temperature = 0.5

In [3]:
# Use lambda instead of gamma to immitate shape of gae without value predictions
# pretrainer = Pretrainer(gamma=lam)

In [4]:
# players_data = pretrainer._load_data()

In [5]:
# pretrainer._load_dset(players_data)

In [6]:
# max_len = pretrainer._max_len
max_len = 22

In [7]:
# gt_dset = pretrainer._cache_dset()

In [8]:
agent = TetrisModel(piece_dim=piece_dim,
                    key_dim=key_dim,
                    depth=depth,
                    num_heads=4,
                    num_layers=4,
                    max_length=max_len,
                    out_dim=key_dim)

In [9]:
agent_optimizer = keras.optimizers.Adam()
agent.compile(optimizer=agent_optimizer)

In [10]:
logits = agent((tf.random.uniform((1, 28, 10, 1)),
                tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)))
agent.summary(), tf.shape(logits)

Model: "tetris_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (1, 70, 16)               4800      
                                                                 
 seq_embedding (SeqEmbedding  multiple                 128       
 )                                                               
                                                                 
 seq_embedding_1 (SeqEmbeddi  multiple                 192       
 ng)                                                             
                                                                 
 piece_dec_0 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_1 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_2 (DecoderLayer)  multiple                 

(None, <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1, 22, 12])>)

In [11]:
critic = TetrisModel(piece_dim=piece_dim,
                     key_dim=key_dim,
                     depth=depth,
                     num_heads=4,
                     num_layers=4,
                     max_length=max_len,
                     out_dim=1)

In [12]:
critic_optimizer = keras.optimizers.Adam()
critic.compile(optimizer=critic_optimizer)

In [13]:
values = critic((tf.random.uniform((1, 28, 10, 1)),
                 tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                 tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)))
critic.summary(), tf.shape(values)

Model: "tetris_model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_9 (Sequential)   (1, 70, 16)               4800      
                                                                 
 seq_embedding_2 (SeqEmbeddi  multiple                 128       
 ng)                                                             
                                                                 
 seq_embedding_3 (SeqEmbeddi  multiple                 192       
 ng)                                                             
                                                                 
 piece_dec_0 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_1 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_2 (DecoderLayer)  multiple               

(None, <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1, 22,  1])>)

In [14]:
agent.load_weights('agent_weights_small/agent')
critic.load_weights('critic_weights_small/critic')
# with open('agent_weights_small/optimizer.pkl', 'rb') as f:
#     weight_values = pickle.load(f)
# agent_optimizer.set_weights(weight_values)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1ec42df7fd0>

In [15]:
ref_agent = TetrisModel(piece_dim=piece_dim,
                        key_dim=key_dim,
                        depth=depth,
                        num_heads=4,
                        num_layers=4,
                        max_length=max_len,
                        out_dim=key_dim)

In [16]:
logits = ref_agent((tf.random.uniform((1, 28, 10, 1)),
                    tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                    tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)))

In [17]:
ref_agent.load_weights('agent_weights_small/agent')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1ec6c97dd00>

In [18]:
epochs = 10

In [19]:
# actor_losses, critic_losses, accs = pretrainer.train(agent, critic, gt_dset, epochs)

In [20]:
# plt.plot(actor_losses)
# plt.plot(critic_losses)
# plt.plot(accs)

In [21]:
# if 'y' in input('YOU SURE?????'):
#     ref_agent.save_weights('agent_weights_small/agent_reference')

In [22]:
%matplotlib qt

In [23]:
trainer = Trainer(agent=agent,
                  critic=critic,
                  ref_model=ref_agent,
                  max_len=max_len,
                  gamma=gamma,
                  lam=lam,
                  temperature=temperature,
                  max_episode_steps=100,
                  buffer_cap=500)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmichaelsherrick[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [24]:
trainer.fill_replay_buffer()

Done filling replay buffer

In [None]:
trainer.train(gens=10000, train_steps=5, training_actor=False)

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.
Critic Loss: 0.84	|	vg Reward: 0.1	|	Total Reward: 5.5	||

In [35]:
if 'y' in input('YOU SURE?????'):
    agent.save_weights('agent_weights_small/agent')
    critic.save_weights('critic_weights_small/critic')
    
    # symbolic_weights = getattr(agent.optimizer, 'weights')
    # weight_values = K.batch_get_value(symbolic_weights)
    # with open('agent_weights_small/optimizer.pkl', 'wb') as f:
    #     pickle.dump(weight_values, f)

YOU SURE????? y


In [28]:
episode_data = trainer.player.run_episode(agent, critic, max_steps=100, greedy=True, renderer=trainer.renderer)

In [29]:
episode_boards, episode_pieces, episode_inputs, episode_probs, episode_values, episode_rewards = episode_data

In [30]:
episode_advantages, episode_returns = trainer._compute_gae(episode_values, episode_rewards, trainer.gamma, trainer.lam)

In [31]:
fig, ax = plt.subplots()
ax.plot(episode_returns, label='Returns')
ax.plot(episode_rewards, label='Rewards')
ax.plot(episode_values, label='Values')
ax.plot(episode_advantages, label='Advantages')
ax.legend()
tf.reduce_sum(episode_rewards)

<tf.Tensor: shape=(), dtype=float32, numpy=5.499999>