In [1]:
from Player import Player
from Trainer import Trainer
from TetrisModel import TetrisModel
from Pretrainer import Pretrainer
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import backend as K
import matplotlib.pyplot as plt
import pickle
import glob
import time

In [2]:
piece_dim = 8
key_dim = 12
depth = 16
gamma = 0.99
lam = 0.95
temperature = 1.0

In [3]:
# Use lambda instead of gamma to immitate shape of gae without value predictions
# pretrainer = Pretrainer(gamma=lam)

In [4]:
# players_data = pretrainer._load_data()

In [5]:
# pretrainer._load_dset(players_data)

In [6]:
# max_len = pretrainer._max_len
max_len = 10

In [7]:
# gt_dset = pretrainer._cache_dset()

In [8]:
agent = TetrisModel(piece_dim=piece_dim,
                    key_dim=key_dim,
                    depth=depth,
                    num_heads=4,
                    num_layers=4,
                    max_length=max_len,
                    out_dim=key_dim)

In [38]:
agent_optimizer = keras.optimizers.Adam(learning_rate=1e-4)
agent.compile(optimizer=agent_optimizer)

In [39]:
logits, piece_scores, key_scores = agent((tf.random.uniform((1, 28, 10, 1)),
                                          tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                                          tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)), return_scores=True)
agent.summary(), tf.shape(logits), tf.shape(piece_scores), tf.shape(key_scores)

Model: "tetris_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (None, 70, 16)            4800      
                                                                 
 seq_embedding (SeqEmbedding  multiple                 128       
 )                                                               
                                                                 
 seq_embedding_1 (SeqEmbeddi  multiple                 192       
 ng)                                                             
                                                                 
 piece_dec_0 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_1 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_2 (DecoderLayer)  multiple                 

(None,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1, 10, 12])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 4,  1,  4,  7, 70])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 4,  1,  4, 10,  7])>)

In [40]:
critic = TetrisModel(piece_dim=piece_dim,
                     key_dim=key_dim,
                     depth=depth,
                     num_heads=4,
                     num_layers=4,
                     max_length=max_len,
                     out_dim=1)

In [41]:
critic_optimizer = keras.optimizers.Adam(learning_rate=1e-4)
critic.compile(optimizer=critic_optimizer)

In [42]:
values, piece_scores, key_scores = critic((tf.random.uniform((1, 28, 10, 1)),
                                           tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                                           tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)), return_scores=True)
critic.summary(), tf.shape(values), tf.shape(piece_scores), tf.shape(key_scores)

Model: "tetris_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_27 (Sequential)  (1, 70, 16)               4800      
                                                                 
 seq_embedding_6 (SeqEmbeddi  multiple                 128       
 ng)                                                             
                                                                 
 seq_embedding_7 (SeqEmbeddi  multiple                 192       
 ng)                                                             
                                                                 
 piece_dec_0 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_1 (DecoderLayer)  multiple                  9776      
                                                                 
 piece_dec_2 (DecoderLayer)  multiple               

(None,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1, 10,  1])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 4,  1,  4,  7, 70])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 4,  1,  4, 10,  7])>)

In [44]:
agent.load_weights('agent_weights_small/agent_finetuned_lowlr')
critic.load_weights('critic_weights_small/critic_finetuned_lowlr')
# with open('agent_weights_small/optimizer.pkl', 'rb') as f:
#     weight_values = pickle.load(f)
# agent_optimizer.set_weights(weight_values)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1dab5991cd0>

In [45]:
ref_agent = TetrisModel(piece_dim=piece_dim,
                        key_dim=key_dim,
                        depth=depth,
                        num_heads=4,
                        num_layers=4,
                        max_length=max_len,
                        out_dim=key_dim)

In [46]:
logits, piece_scores, key_scores = ref_agent((tf.random.uniform((1, 28, 10, 1)),
                                              tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                                              tf.random.uniform((1, max_len), minval=0, maxval=key_dim, dtype=tf.int32)), return_scores=True)
tf.shape(logits), tf.shape(piece_scores), tf.shape(key_scores)



(<tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1, 10, 12])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 4,  1,  4,  7, 70])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 4,  1,  4, 10,  7])>)

In [47]:
ref_agent.load_weights('agent_weights_small/agent')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x1daba564850>

In [48]:
# epochs = 10

In [49]:
# actor_losses, critic_losses, accs = pretrainer.train(agent, critic, gt_dset, epochs)

In [50]:
# plt.plot(actor_losses)
# plt.plot(critic_losses)
# plt.plot(accs)

In [51]:
# if 'y' in input('YOU SURE?????'):
#     ref_agent.save_weights('agent_weights_small/agent_reference')

In [52]:
%matplotlib qt

In [55]:
trainer = Trainer(agent=agent,
                  critic=critic,
                  ref_model=ref_agent,
                  max_len=max_len,
                  gamma=gamma,
                  lam=lam,
                  temperature=temperature,
                  max_episode_steps=100,
                  buffer_cap=2000)

VBox(children=(Label(value='0.012 MB of 0.012 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [56]:
trainer.fill_replay_buffer()

Done filling replay buffer

In [None]:
while True:
    trainer.train(gens=500, train_steps=20, training_actor=True)
    agent.save_weights(f'agent_weights_small/agent_finetuned_{trainer.wandb_run.step}')
    critic.save_weights(f'critic_weights_small/critic_finetuned_{trainer.wandb_run.step}')

PPO Loss: 0.03	|	KL Penalty: 0.08	|	Critic Loss: 0.42	|		

In [41]:
if 'y' in input('YOU SURE?????'):
    agent.save_weights(f'agent_weights_small/agent_finetuned_{trainer.wandb_run.step}')
    critic.save_weights(f'critic_weights_small/critic_finetuned_{trainer.wandb_run.step}')
trainer.wandb_run.step
    # symbolic_weights = getattr(agent.optimizer, 'weights')
    # weight_values = K.batch_get_value(symbolic_weights)
    # with open('agent_weights_small/optimizer.pkl', 'wb') as f:
    #     pickle.dump(weight_values, f)

YOU SURE????? y


3840

In [58]:
episode_data = trainer.player.run_episode(agent, critic, max_steps=100, greedy=True, renderer=trainer.renderer)

In [59]:
episode_boards, episode_pieces, episode_inputs, episode_probs, episode_values, episode_rewards = episode_data

In [60]:
episode_advantages, episode_returns = trainer._compute_gae(episode_values, episode_rewards, trainer.gamma, trainer.lam)

In [61]:
fig, ax = plt.subplots()
ax.plot(episode_returns, label='Returns')
ax.plot(episode_values, label='Values')
ax.legend()
tf.reduce_sum(episode_rewards)

<tf.Tensor: shape=(), dtype=float32, numpy=6.5>

In [62]:
fig, ax = plt.subplots()
ax.plot(episode_rewards, label='Rewards')
ax.plot(episode_advantages, label='Advantages')
ax.legend()

<matplotlib.legend.Legend at 0x1db3e783a90>