In [1]:
# Copyright 2022 Mathias Lechner
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import tensorflow as tf

# import gymnasium
import ale_py
from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind
from ncps.tf import CfC
import numpy as np
from ncps.datasets.tf import AtariCloningDatasetTF
import gymnasium as gym
import os 
from tensorflow.keras.callbacks import ModelCheckpoint
tf.random.set_seed(42)
np.random.seed(42)
tf.random.set_seed(42)

os.environ["TF_DETERMINISTIC_OPS"] = "1"
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)

# Not used in this example





  from pkg_resources import packaging
  VALID_NP_HPARAMS = (np.bool8, np.float32, np.float64, np.int32, np.int64)


In [2]:
class ConvBlock(tf.keras.models.Sequential):
    def __init__(self):
        super(ConvBlock, self).__init__(
            [
                tf.keras.Input((84, 84, 4)),
                tf.keras.layers.Lambda(
                    lambda x: tf.cast(x, tf.float32) / 255.0
                ),  # normalize input
                tf.keras.layers.Conv2D(
                    64, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.Conv2D(
                    128, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.Conv2D(
                    128, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.Conv2D(
                    256, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.GlobalAveragePooling2D(),
            ]
        )


class ImpalaConvLayer(tf.keras.models.Sequential):
    def __init__(self, filters, kernel_size, strides, padding="valid",first = None, use_bias=False):
        
        if first == None:
            y = tf.keras.layers.Conv2D(
                    filters=filters,
                    kernel_size=kernel_size,
                    strides=strides,
                    padding=padding,
                    use_bias=use_bias,
                    kernel_initializer=tf.keras.initializers.VarianceScaling(
                        scale=2.0, mode="fan_out", distribution="truncated_normal"
                    ),
                )
        else: 
            y =  tf.keras.layers.Conv2D(
                    filters=filters,
                    kernel_size=kernel_size,
                    strides=strides,
                    padding=padding,
                    use_bias=use_bias,
                    kernel_initializer=tf.keras.initializers.VarianceScaling(
                        scale=2.0, mode="fan_out", distribution="truncated_normal"
                    ),
                    batch_input_shape = first
                )
        super(ImpalaConvLayer, self).__init__(
            [
                y,
                tf.keras.layers.BatchNormalization(momentum=0.99, epsilon=0.001),
                tf.keras.layers.ReLU(),
            ]
        )


class ImpalaConvBlock(tf.keras.models.Sequential):
    def __init__(self):
        super(ImpalaConvBlock, self).__init__(
            [
                ImpalaConvLayer(filters=16, kernel_size=8, strides=4,first= None),
                ImpalaConvLayer(filters=32, kernel_size=4, strides=2,first= None),
                ImpalaConvLayer(filters=32, kernel_size=3, strides=1,first= None),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(units=256, activation="relu"),
            ]
        )


class ConvCFC(tf.keras.Model):
    def __init__(self, n_actions, units = 4, mixed_memory=False, go_backwards=False, stateful=False,backbone_units=128, backbone_layers=1, backbone_dropout=0):
        super().__init__()
        self.mixed_memory = mixed_memory
        self.conv_block = ImpalaConvBlock()
        self.td_conv = tf.keras.layers.TimeDistributed(self.conv_block)
        # EDIT : 1  
        self.rnn = CfC(units=units, 
                       mixed_memory=mixed_memory, 
                       go_backwards=go_backwards, 
                       stateful=stateful,
                       backbone_units=backbone_units, 
                       backbone_layers=backbone_layers, 
                       backbone_dropout=backbone_dropout,
                       return_sequences=True, 
                       return_state=True)
        self.linear = tf.keras.layers.Dense(n_actions)


    def get_initial_states(self, batch_size=1):
        return self.rnn.cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)

    def call(self, x, training=None, **kwargs):
        has_hx = isinstance(x, list) or isinstance(x, tuple)
        initial_state = None
        if has_hx:
            x, initial_state = x

        x = self.td_conv(x, training=training)

        if self.mixed_memory:
            x,_,next_state = self.rnn(x, initial_state=initial_state)
        else:
            x,next_state = self.rnn(x, initial_state=initial_state)

        x = self.linear(x)
        if has_hx:
            return (x, next_state)
        return x


In [3]:
def run_closed_loop(model, env, num_episodes=None):
    # obs = env.reset()
    hx = model.get_initial_states()
    returns = []
    total_reward = 0
    while True:
        # add batch and time dimension (with a single element in each)
        obs = np.expand_dims(np.expand_dims(obs, 0), 0)
        pred, hx = model.predict((obs, hx), verbose=0)
        action = pred[0, 0].argmax()
        # remove time and batch dimension -> then argmax
        # obs, r, term, trunc, _ = env.step(action)
        # done = term or trunc
        obs, r, done, _ = env.step(action)
        total_reward += r
        if done:
            returns.append(total_reward)
            total_reward = 0
            obs = env.reset()
            hx = model.get_initial_states()
            # Reset RNN hidden states when episode is over
            if num_episodes is not None:
                # Count down the number of episodes
                num_episodes = num_episodes - 1
                if num_episodes == 0:
                    return returns
            if num_episodes is None:
                print(
                    f"Return {returns[-1]:0.2f} [{np.mean(returns):0.2f} +- {np.std(returns):0.2f}]"
                )



In [4]:

def run_test(LR=0.0001, epochs=10, 
             units=4, 
             mixed_memory=False, 
             go_backwards=False, 
             stateful=False,
             backbone_units=128, 
             backbone_layers=1, 
             backbone_dropout=0,
             batch_size = 32):
   
    
    env = gym.make("ALE/Breakout-v5")
    env = wrap_deepmind(env)

    data = AtariCloningDatasetTF("breakout")
    trainloader = data.get_dataset(batch_size, split="train")
    valloader = data.get_dataset(batch_size, split="val")

    model = ConvCFC(env.action_space.n, units=units, mixed_memory=mixed_memory, go_backwards=go_backwards,
                    stateful=stateful, backbone_units=backbone_units, backbone_layers=backbone_layers,
                    backbone_dropout=backbone_dropout)
    loss_fxn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(
        loss=loss_fxn,
        optimizer=tf.keras.optimizers.Adam(LR),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
    )
    model.build((None, None, 84, 84, 4))

    # env = gymnasium.make("ALE/Breakout-v5", render_mode="human")
    # env = wrap_deepmind(env)
    # run_closed_loop(model, env)

    inp_data = {
        'batch_size':batch_size,
        'epochs':epochs,
        'LR':LR,
        'units':units,
        'mixed_memory':mixed_memory,
        'go_backwards':go_backwards,
        'stateful':stateful,
        'backbone_units':backbone_units,
        'backbone_layers':backbone_layers,
        'backbone_dropout':backbone_dropout
    }

    # checkpoint_callback = ModelCheckpoint(filepath='model_checkpoint.h5',
    #                                   save_best_only=True,  # Set to True if you want to save only the best model
    #                                   save_weights_only=True,  # Set to False if you want to save the entire model
    #                                   monitor='val_loss',  # You can choose a different metric to monitor
    #                                   mode='min',  # 'min' for loss, 'max' for accuracy, 'auto' will infer from the name
    #                                   verbose=1)

    # inp_data = dict(((k, eval(k) for k in ('batch_size','epochs','LR','units', 'mixed_memory','go_backwards','stateful','ode_unfolds')))
    model.summary()
    model.fit(
        trainloader,
        epochs=epochs,
        validation_data=valloader,
        verbose=1,
    )
    visualize(model)



def visualize(model):
    # Visualize Atari game and play endlessly
    # env = gymnasium.make("ALE/Breakout-v5", render_mode="human")
    env = gym.make("ALE/Breakout-v5", render_mode="human")
    env = wrap_deepmind(env)
    run_closed_loop(model, env)


In [5]:
run_test(epochs= 1)



Downloading data ... [done]


















Model: "conv_cfc"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 impala_conv_block (ImpalaC  (None, 256)               423488    
 onvBlock)                                                       
                                                                 
 time_distributed (TimeDist  multiple                  423488    
 ributed)                                                        
                                                                 
 cf_c (CfC)                  multiple                  35472     
                                                                 
 dense_1 (Dense)             multiple                  20        
                                                                 
Total params: 458980 (1.75 MB)
Trainable params: 458820 (1.75 MB)
Non-trainable params: 160 (640.00 Byte)
_________________________________________________________________






 52/938 [>.............................] - ETA: 21:17 - loss: 0.6773 - sparse_categorical_accuracy: 0.7977

: 