In [1]:

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import tensorflow as tf

# import gymnasium
import ale_py
from ray.rllib.env.wrappers.atari_wrappers import wrap_deepmind
from ncps.tf import CfC
import numpy as np
from ncps.datasets.tf import AtariCloningDatasetTF
import gym
import os 
from tensorflow.keras.callbacks import ModelCheckpoint
tf.random.set_seed(42)
np.random.seed(42)
tf.random.set_seed(42)

os.environ["TF_DETERMINISTIC_OPS"] = "1"
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)

# Not used in this example

2024-03-17 12:44:07.569428: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-17 12:44:07.624044: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-17 12:44:07.624090: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-17 12:44:07.625508: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-17 12:44:07.634583: I tensorflow/core/platform/cpu_feature_guar

In [2]:
class ConvBlock(tf.keras.models.Sequential):
    def __init__(self):
        super(ConvBlock, self).__init__(
            [
                tf.keras.Input((84, 84, 4)),
                tf.keras.layers.Lambda(
                    lambda x: tf.cast(x, tf.float32) / 255.0
                ),  # normalize input
                tf.keras.layers.Conv2D(
                    64, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.Conv2D(
                    128, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.Conv2D(
                    128, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.Conv2D(
                    256, 5, padding="same", activation="relu", strides=2
                ),
                tf.keras.layers.GlobalAveragePooling2D(),
            ]
        )


class ImpalaConvLayer(tf.keras.models.Sequential):
    def __init__(self, filters, kernel_size, strides, padding="valid",first = None, use_bias=False):
        
        if first == None:
            y = tf.keras.layers.Conv2D(
                    filters=filters,
                    kernel_size=kernel_size,
                    strides=strides,
                    padding=padding,
                    use_bias=use_bias,
                    kernel_initializer=tf.keras.initializers.VarianceScaling(
                        scale=2.0, mode="fan_out", distribution="truncated_normal"
                    ),
                )
        else: 
            y =  tf.keras.layers.Conv2D(
                    filters=filters,
                    kernel_size=kernel_size,
                    strides=strides,
                    padding=padding,
                    use_bias=use_bias,
                    kernel_initializer=tf.keras.initializers.VarianceScaling(
                        scale=2.0, mode="fan_out", distribution="truncated_normal"
                    ),
                    batch_input_shape = first
                )
        super(ImpalaConvLayer, self).__init__(
            [
                y,
                tf.keras.layers.BatchNormalization(momentum=0.99, epsilon=0.001),
                tf.keras.layers.ReLU(),
            ]
        )


class ImpalaConvBlock(tf.keras.models.Sequential):
    def __init__(self):
        super(ImpalaConvBlock, self).__init__(
            [
                ImpalaConvLayer(filters=16, kernel_size=8, strides=4,first= None),
                ImpalaConvLayer(filters=32, kernel_size=4, strides=2,first= None),
                ImpalaConvLayer(filters=32, kernel_size=3, strides=1,first= None),
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(units=256, activation="relu"),
            ]
        )


class ConvCFC(tf.keras.Model):
    def __init__(self, n_actions, units = 4, mixed_memory=False, go_backwards=False, stateful=False,backbone_units=128, backbone_layers=1, backbone_dropout=0):
        super().__init__()
        self.mixed_memory = mixed_memory
        self.conv_block = ImpalaConvBlock()
        self.td_conv = tf.keras.layers.TimeDistributed(self.conv_block)
        # EDIT : 1  
        self.rnn = CfC(units=units, 
                       mixed_memory=mixed_memory, 
                       go_backwards=go_backwards, 
                       stateful=stateful,
                       backbone_units=backbone_units, 
                       backbone_layers=backbone_layers, 
                       backbone_dropout=backbone_dropout,
                       return_sequences=True, 
                       return_state=True)
        self.linear = tf.keras.layers.Dense(n_actions)


    def get_initial_states(self, batch_size=1):
        return self.rnn.cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)

    def call(self, x, training=None, **kwargs):
        has_hx = isinstance(x, list) or isinstance(x, tuple)
        initial_state = None
        if has_hx:
            # additional inputs are passed as Copyright 2022 Mathias Lechner a tuple
            x, initial_state = x

        x = self.td_conv(x, training=training)

        if self.mixed_memory:
            x,_,next_state = self.rnn(x, initial_state=initial_state)
        else:
            x,next_state = self.rnn(x, initial_state=initial_state)

        x = self.linear(x)
        if has_hx:
            return (x, next_state)
        return x


In [3]:

def run_closed_loop(model, env, num_episodes=None):
    obs = env.reset()
    hx = model.get_initial_states()
    returns = []
    total_reward = 0
    while True:
        # add batch and time dimension (with a single element in each)
        obs = np.expand_dims(np.expand_dims(obs, 0), 0)
        pred, hx = model.predict((obs, hx), verbose=0)
        action = pred[0, 0].argmax()
        # remove time and batch dimension -> then argmax
        # obs, r, term, trunc, _ = env.step(action)
        # done = term or trunc
        obs, r, done, _ = env.step(action)
        total_reward += r
        if done:
            returns.append(total_reward)
            total_reward = 0
            obs = env.reset()
            hx = model.get_initial_states()
            # Reset RNN hidden states when episode is over
            if num_episodes is not None:
                # Count down the number of episodes
                num_episodes = num_episodes - 1
                if num_episodes == 0:
                    return returns
            if num_episodes is None:
                print(
                    f"Return {returns[-1]:0.2f} [{np.mean(returns):0.2f} +- {np.std(returns):0.2f}]"
                )


In [4]:

def register_metrics(LR,
                    batch_size, 
                    epochs, 
                    epoch, 
                    mixed_memory, 
                    go_backwards, 
                    stateful, 
                    hidden_size, 
                    backbone_units, 
                    backbone_layers, 
                    backbone_dropout,
                    loss, 
                    accuracy, 
                    precision_score, 
                    f1, 
                    recall,
                    file_path="cfc_multi_exp.csv"):
    import os
    import csv
    
    if not os.path.exists(file_path):
        # File does not exist, create it
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow([
                "Learing rate", "batch_size", "epochs", "current_epoch", "mixed_memory", 
                "go_backwards", "stateful", "hidden_size", "backbone_units", 
                "backbone_layers", "backbone_dropout", "loss", 
                "accuracy", "precision_score", "f1", "recall"
            ])
        print(f'CSV file "{file_path}" created.')

    with open(file_path, 'a', newline='') as file:
        writer = csv.writer(file)
        # Write a new row
        writer.writerow([
            LR, batch_size, epochs, epoch, mixed_memory, go_backwards, stateful, 
            hidden_size,backbone_units, backbone_layers, backbone_dropout,
            loss, accuracy, precision_score, f1, recall
        ])


In [5]:

# TODO
class EvalCSVCallback(tf.keras.callbacks.Callback):
    def __init__(self,model,valloader,inp_data,loss_fxn):
        super().__init__()
        self.model = model
        self.valloader = valloader
        self.inp_data = inp_data
        self.loss_fxn = loss_fxn

    def on_epoch_end(self,epoch,logs=None):
        self.model.save('my_model', save_format='tf')

        all_pred_labels = []
        all_true_labels = []
        total_loss = 0
        for inputs, labels in self.valloader:
            outputs = self.model.predict(inputs,verbose=0)
            loss = self.loss_fxn(labels, outputs)

            # Store predictions and true labels
            pred_labels = tf.argmax(outputs, axis=-1).numpy()
            all_pred_labels.extend(pred_labels)
            all_true_labels.extend(labels.numpy())

            # Accumulate the total loss
            total_loss += loss.numpy()
       
        all_pred_labels = np.array(all_pred_labels).flatten()
        all_true_labels = np.array(all_true_labels).flatten()
        print(all_true_labels.shape)
        print(all_pred_labels.shape)
        # Calculate metrics
        precision = precision_score(all_true_labels, all_pred_labels, average='weighted', labels=np.unique(all_pred_labels))
        recall = recall_score(all_true_labels, all_pred_labels, average='weighted', labels=np.unique(all_pred_labels))
        f1 = f1_score(all_true_labels, all_pred_labels, average='weighted', labels=np.unique(all_pred_labels))
        accuracy = accuracy_score(all_true_labels, all_pred_labels)
        average_loss = total_loss / len(self.valloader)

        # Register metrics
        register_metrics(
                    self.inp_data['LR'],
                    self.inp_data['batch_size'],
                    self.inp_data['epochs'],
                    epoch,
                    self.inp_data['mixed_memory'],
                    self.inp_data['go_backwards'],
                    self.inp_data['stateful'],
                    self.inp_data['units'],
                    self.inp_data['backbone_units'],
                    self.inp_data['backbone_layers'],
                    self.inp_data['backbone_dropout'],
                    average_loss,
                    accuracy,
                    precision,
                    f1,
                    recall
                )


In [6]:

def run_test(LR=0.0001, epochs=10, 
             units=4, 
             mixed_memory=False, 
             go_backwards=False, 
             stateful=False,
             backbone_units=128, 
             backbone_layers=1, 
             backbone_dropout=0,
             batch_size = 32):
   
    
    env = gym.make("ALE/Breakout-v5")
    env = wrap_deepmind(env)

    data = AtariCloningDatasetTF("breakout")
    trainloader = data.get_dataset(batch_size, split="train")
    valloader = data.get_dataset(batch_size, split="val")

    model = ConvCFC(env.action_space.n, units=units, mixed_memory=mixed_memory, go_backwards=go_backwards,
                    stateful=stateful, backbone_units=backbone_units, backbone_layers=backbone_layers,
                    backbone_dropout=backbone_dropout)
    loss_fxn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
    model.compile(
        loss=loss_fxn,
        optimizer=tf.keras.optimizers.Adam(LR),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
    )
    model.build((None, None, 84, 84, 4))

    inp_data = {
        'batch_size':batch_size,
        'epochs':epochs,
        'LR':LR,
        'units':units,
        'mixed_memory':mixed_memory,
        'go_backwards':go_backwards,
        'stateful':stateful,
        'backbone_units':backbone_units,
        'backbone_layers':backbone_layers,
        'backbone_dropout':backbone_dropout
    }
    model.summary()
    model.fit(
        trainloader,
        epochs=epochs,
        validation_data=valloader,
        # callbacks=[ClosedLoopCallback(model, env)],
        callbacks=[EvalCSVCallback(model,valloader=valloader,inp_data=inp_data,loss_fxn = loss_fxn)],
    )
    
    visualize(model)



def visualize(model):
    # Visualize Atari game and play endlessly
    # env = gymnasium.make("ALE/Breakout-v5", render_mode="human")
    env = gym.make("ALE/Breakout-v5", render_mode="human")
    env = wrap_deepmind(env)
    run_closed_loop(model, env)


In [8]:
import time

# running tests

for LR in [0.1,0.01]:
    t = time.time()
    run_test(epochs=5,units=4,LR=LR)
    print(f"\n\n\n\ntest time :-{time.time()-t}\n\n\n\n")


for LR in [0.001, 0.00001, 0.000001]:
    for b in [16, 64, 128]:
        for backbone_units in (64,128,256):
            for backbone_layers in (1,2):
                t = time.time()
                run_test(epochs=5,units=4,batch_size=b,LR=LR,backbone_layers=backbone_layers,backbone_units=backbone_units)
                print(f"\n\n\n\ntest time :-{time.time()-t}\n\n\n\n")






















Model: "conv_ltc"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 impala_conv_block (ImpalaC  (None, 256)               423488    
 onvBlock)                                                       
                                                                 
 time_distributed (TimeDist  multiple                  423488    
 ributed)                                                        
                                                                 
 cf_c (CfC)                  multiple                  33540     
                                                                 
 dense_1 (Dense)             multiple                  8         
                                                                 
Total params: 457036 (1.74 MB)
Trainable params: 456876 (1.74 MB)
Non-trainable params: 160 (640.00 Byte)
_________________________________________________________________
Epoch 1/5






  9/938 [..............................] - ETA: 20:50 - loss: 1.2479 - sparse_categorical_accuracy: 0.7701