In [1]:
from Player import Player
from Trainer import Trainer
from TetrisModel import TetrisModel
from Pretrainer import Pretrainer
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import glob
import time

In [2]:
piece_dim = 8
key_dim = 12
depth = 16
gamma = 0.99
lam = 0.95

In [3]:
# Use lambda instead of gamma to immitate shape of gae without value predictions
pretrainer = Pretrainer(gamma=lam)

In [7]:
players_data = pretrainer._load_data()

In [8]:
pretrainer._load_dset(players_data)

1.00

In [4]:
# max_len = pretrainer._max_len
max_len = 7

In [9]:
gt_dset = pretrainer._cache_dset()

Done Caching


In [5]:
agent = TetrisModel(piece_dim=piece_dim,
                    key_dim=key_dim,
                    depth=depth,
                    num_heads=4,
                    num_layers=4,
                    max_length=max_len)

In [6]:
logits, values = agent((tf.random.uniform((1, 28, 10, 1)),
                        tf.random.uniform((1, 7), minval=0, maxval=8, dtype=tf.int32),
                        tf.random.uniform((1, max_len-1), minval=0, maxval=key_dim, dtype=tf.int32)))
agent.summary(), tf.shape(logits), tf.shape(values)

Model: "tetris_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential (Sequential)     (1, 70, 16)               4800      
                                                                 
 seq_embedding (SeqEmbedding  multiple                 128       
 )                                                               
                                                                 
 seq_embedding_1 (SeqEmbeddi  multiple                 192       
 ng)                                                             
                                                                 
 pdec_0 (DecoderLayer)       multiple                  9776      
                                                                 
 pdec_1 (DecoderLayer)       multiple                  9776      
                                                                 
 pdec_2 (DecoderLayer)       multiple                 

(None,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 1,  6, 12])>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 6, 1])>)

In [7]:
agent_optimizer = keras.optimizers.Adam(learning_rate=1e-6)
agent.compile(optimizer=agent_optimizer)

In [8]:
epochs = 10

In [9]:
agent.load_weights('agent_weights/agent_finetuned')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x25da08c9a90>

In [None]:
losses, accs = pretrainer.train(agent, gt_dset, epochs)

In [None]:
plt.plot(losses)
plt.plot(accs)

In [15]:
if 'y' in input('YOU SURE?????'):
    agent.save_weights('agent_weights/agent_finetuned')

YOU SURE????? y


In [10]:
%matplotlib qt

In [11]:
optimizers = keras.optimizers.Adam(learning_rate=1e-6), keras.optimizers.Adam(learning_rate=1e-6)

In [17]:
trainer = Trainer(model=agent,
                  optimizers=optimizers,
                  max_len=max_len,
                  gamma=gamma,
                  lam=lam,
                  max_episode_steps=1000,
                  buffer_cap=2000)

VBox(children=(Label(value='0.013 MB of 0.013 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

In [18]:
trainer.fill_replay_buffer()

Done filling replay buffer


In [None]:
trainer.train(gens=10000, train_steps=10, training_actor=True)

Current Gen: 1	|	Avg Reward: 0.0	|	Total Reward: 6.3	|
Current Gen: 2	|	Avg Reward: 0.0	|	Total Reward: 8.9	|
Current Gen: 3	|	Avg Reward: 0.0	|	Total Reward: 8.9	|
Current Gen: 4	|	Avg Reward: 0.0	|	Total Reward: 8.9	|
Current Gen: 5	|	Avg Reward: 0.0	|	Total Reward: 8.1	|
Current Gen: 6	|	Avg Reward: 0.0	|	Total Reward: 9.1	|
Current Gen: 7	|	Avg Reward: 0.0	|	Total Reward: 7.8	|
Current Gen: 8	|	Avg Reward: 0.0	|	Total Reward: 6.0	|
Current Gen: 9	|	Avg Reward: 0.0	|	Total Reward: 9.0	|
Current Gen: 10	|	Avg Reward: 0.0	|	Total Reward: 6.1	|
Current Gen: 11	|	Avg Reward: 0.0	|	Total Reward: 11.1	|
Current Gen: 12	|	Avg Reward: 0.0	|	Total Reward: 10.1	|
Current Gen: 13	|	Avg Reward: 0.0	|	Total Reward: 6.8	|
Current Gen: 14	|	Avg Reward: 0.0	|	Total Reward: 4.4	|
Current Gen: 15	|	Avg Reward: 0.0	|	Total Reward: 5.4	|
Current Gen: 16	|	Avg Reward: 0.0	|	Total Reward: 8.4	|
Current Gen: 17	|	Avg Reward: 0.0	|	Total Reward: 12.2	|
Current Gen: 18	|	Avg Reward: 0.0	|	Total Reward: 5.4	

In [23]:
episode_data = trainer.player.run_episode(agent, max_steps=1000, greedy=True, renderer=trainer.renderer)

In [24]:
episode_boards, episode_pieces, episode_inputs, episode_actions, episode_valid, episode_probs, episode_values, episode_rewards = episode_data

In [25]:
episode_advantages, episode_returns = trainer._compute_gae(episode_values, episode_rewards, trainer.gamma, trainer.lam)

In [33]:
fig, ax = plt.subplots()
ax.plot(episode_returns, label='Returns')
ax.plot(episode_rewards, label='Rewards')
ax.plot(episode_values, label='Values')
ax.plot(episode_advantages, label='Advantages')
ax.legend()

<matplotlib.legend.Legend at 0x261ae120520>