In [1]:
from model import DeepQNetwork
from env import visualize_episodes, Environment
import tensorflow as tf
import numpy as np

In [2]:
DEFAULT_PARAMS = {
    'batches_per_epoch' : 1000,
    'learning_rate' : 0.001,
    'gamma' : 0.9,
    'epsilon' : 0.9,
    'epsilon_decay' : 0.97,
    'buffer_size_in_batches' : 10000,
    'batch_size' : 16,
    'replay_ratio' : 0.1,
    'polyak_avg_fac' : 0.995
}

STATIC_MLP = {
    'epochs' : 50,
    'visual' : False,
    'rgb' : False,
    'cnn_depth' : 2,
    'mlp_layers' : 2,
    'head_layers' : 1,
    'filters' : 1,
    'kernel_size' : 3,
    'k_init' : 'glorot_uniform',
    'b_init' : 'zeros'
}

STATIC_CNN = {
    'epochs' : 50,
    'visual' : True,
    'rgb' : False,
    'cnn_depth' : 2,
    'mlp_layers' : 2,
    'head_layers' : 1,
    'filters' : 1,
    'kernel_size' : 3,
    'k_init' : 'glorot_uniform',
    'b_init' : 'zeros'
}

In [3]:
params = {key : val for key, val in list(DEFAULT_PARAMS.items())+list(STATIC_MLP.items())}
# params = {key : val for key, val in list(DEFAULT_PARAMS.items())+list(STATIC_MLP.items())}

In [4]:
model = DeepQNetwork(
    tf.keras.optimizers.SGD(learning_rate=params['learning_rate']),
    visual=params['visual'],
    rgb=params['rgb'],
    cnn_depth=params['cnn_depth'],
    mlp_layers=params['mlp_layers'],
    head_layers=params['head_layers'],
    filters=params['filters'],
    kernel_size=params['kernel_size'],
    k_init=params['k_init'],
    b_init=params['b_init']
)

In [5]:
env = Environment(rgb=params['rgb'])

In [6]:
model.load('D:/experiment_data_DREPL/','cnn_id1_ep5_')

In [9]:
def get_example_episodes(env, model, n=20):
    lst = []
    env.reset()
    
    for i in range(n):
        sublst = []
        
        q_vals = model(tf.expand_dims(env.observation, axis=0))
        action = int(np.argmax(tf.squeeze(q_vals).numpy()))
        sublst += [action]
        
        while not env.terminal:
            q_vals = model(tf.expand_dims(env.step(action)[0], axis=0))
            action = int(np.argmax(tf.squeeze(q_vals).numpy()))
            sublst += [action]
        
        lst += [sublst]
        env.reset()
    
    return lst

In [10]:
example_episodes = get_example_episodes(env, model)

In [9]:
visualize_episodes(example_episodes)

In [12]:
# example_episodes

In [23]:
env.reset()
env.step(1)[1:]

(<tf.Tensor: shape=(1,), dtype=float32, numpy=array([-2.3663082], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=bool, numpy=array([False])>)

In [10]:
# from PIL import Image

# env = Environment(rgb=False)

# env.reset()
# img = Image.fromarray(np.squeeze(env.observation.numpy()), 'L')
# img.show()