In [1]:
import numpy as np
import os
import cv2
import einops
import random
import time
from typing import Any, List, Sequence, Tuple
from IPython import display
from matplotlib import pyplot as plt
from matplotlib import animation
from IPython.display import HTML

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_probability as tfp
import tensorflow as tf

tfd = tfp.distributions

physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

2024-09-07 21:06:48.138830: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-07 21:06:48.138864: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-07 21:06:48.140470: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-07 21:06:48.285586: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-09-07 21:06:51.592484: I tensorflow/compiler/

In [2]:
class CoinRunDataset:
    def __init__(self, data_dir, set='train', batch_size=32):
        self.data_dir = data_dir
        self.set = 'train'
        self.batch_size = batch_size
        self.filenames = os.listdir(data_dir)
        self.max_frames = 20
        self.a_width = 1
        
        self.data_length = len(self.filenames)
        print("Length of %s data: " % set, self.data_length)

    def __len__(self):
        return self.data_length

    def __iter__(self):
        for _ in range(0, self.data_length):
            file_idx = random.randint(0, self.data_length - 1)
            fname = self.filenames[file_idx]
            
            if not fname.endswith('npz'): 
                continue
    
            file_path = os.path.join(self.data_dir, fname)
            data = np.load(file_path)
            img = data['obs']

            video_length = img.shape[0]
            
            action = np.reshape(data['action'], newshape=[-1, self.a_width])
            reward = data['reward']
            done = data['done']
            N = data['N']
            '''
            video_length:  554
            img.shape:  (554, 64, 64, 3)
            action.shape:  (554, 1)
            reward.shape:  (554,)
            done.shape:  (554,)
            N.shape:  (554,)
            '''
            offset = random.randint(0, video_length - self.max_frames - 1)
            
            img = img[offset:offset + self.max_frames]
            action = action[offset:offset + self.max_frames]
            reward = reward[offset:offset + self.max_frames]
            done = done[offset:offset + self.max_frames]
            N = N[offset:offset + self.max_frames]

            yield img, action, reward, done, N

    __call__ = __iter__

data_dir = "record"
ds_gen = CoinRunDataset(data_dir=data_dir, set='train', batch_size=32)
ds = tf.data.Dataset.from_generator(ds_gen, (tf.float32, tf.float32, tf.float32, tf.bool, tf.int16))

Length of train data:  5456


2024-09-07 21:06:51.710703: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-07 21:06:51.710916: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-09-07 21:06:51.711073: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [3]:
%matplotlib inline

from matplotlib import pyplot as plt
from matplotlib import animation
import random
import cv2
from IPython.display import HTML
from IPython import display

display.clear_output(wait=False)
for idx, data in enumerate(ds):
    video, action, reward, done, N = data
    #print("img.shape: ", img.shape)
    #print("action.shape: ", action.shape)
    #print("reward.shape: ", reward.shape)
    #print("done.shape: ", done.shape)
    #print("N.shape: ", N.shape)
    break

fig = plt.figure()
plt.title('real (left), reconstruction (right)')
plt.axis('off')
im = plt.imshow(video[0, :, :, :])
plt.close()

def init():
    im.set_data(video[0, :, :, :])

def animate(i):
    im.set_data(video[i, :, :, :])
    return im

anim = animation.FuncAnimation(fig, animate, init_func=init, frames=video.shape[0], interval=200) # 200ms = 5 fps
#anim.save('dynamic_images.mp4')
HTML(anim.to_html5_video())

In [4]:
class InverseActionPolicy(tf.keras.Model):
  """Inverse Dynamics  network."""
  def __init__(self, num_actions: int, num_hidden_units: int):
    """Initialize."""
    super().__init__()

    self.num_actions = num_actions

    # obs
    self.conv3d_1 = tf.keras.layers.Conv3D(filters=12, kernel_size=(5, 5, 5), padding="same")
    self.conv3d_2 = tf.keras.layers.Conv3D(filters=24, kernel_size=(3, 3, 3), padding="same")
    self.conv3d_3 = tf.keras.layers.Conv3D(filters=48, kernel_size=(1, 1, 1), padding="same")
    self.lstm = tf.keras.layers.LSTM(512, return_sequences=True, return_state=True, kernel_regularizer='l2')
    self.common = layers.Dense(num_hidden_units, activation="relu", kernel_regularizer='l2')

    self.actor = layers.Dense(num_actions, kernel_regularizer='l2')

  def get_config(self):
    config = super().get_config().copy()
    config.update({'num_actions': self.num_actions, 'num_hidden_units': self.num_hidden_units})

    return config
    
  def call(self, state: tf.Tensor, memory_state: tf.Tensor, carry_state: tf.Tensor, training) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
    #print("state.shape: ", state.shape)

    state = tf.cast(state, tf.float32)

    batch_size = state.shape[0]
    time_step = state.shape[1]

    conv3d_1 = self.conv3d_1(state)
    conv3d_1 = tf.keras.layers.LayerNormalization()(conv3d_1)
    conv3d_1 = tf.keras.layers.ReLU()(conv3d_1)

    conv3d_2 = self.conv3d_2(conv3d_1)
    conv3d_2 = tf.keras.layers.LayerNormalization()(conv3d_2)
    conv3d_2 = layers.ReLU()(conv3d_2)

    conv3d_3 = self.conv3d_3(conv3d_2)
    conv3d_3 = tf.keras.layers.LayerNormalization()(conv3d_3)
      
    conv3d_3 = tf.keras.layers.BatchNormalization()(conv3d_3)
    conv3d_3 = tf.keras.layers.ReLU()(conv3d_3)
    #print("conv3d_3.shape: ", conv3d_3.shape)

    conv3d_reshaped = tf.reshape(conv3d_3, [batch_size, time_step, -1])
    #print("conv3d_reshaped.shape: ", conv3d_reshaped.shape)

    initial_state = (memory_state, carry_state)
    lstm_output, final_memory_state, final_carry_state  = self.lstm(conv3d_reshaped, initial_state=initial_state, training=training)
    #print("lstm_output.shape: ", lstm_output.shape)
      
    X_input = self.common(lstm_output)
    #print("X_input: ", X_input)
      
    pi_latent  = self.actor(X_input)
    #print("pi_latent.shape: ", pi_latent.shape)
    #print("")

    return pi_latent, memory_state, carry_state

num_actions = 7
num_hidden_units = 1024
model = InverseActionPolicy(num_actions, num_hidden_units)

In [None]:
cce_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(0.0001)

for epoch in range(0, 1000000):
    start_time = time.time()
    
    mean_loss = tf.keras.metrics.Mean()
    for idx, data in enumerate(ds):
        #print("idx: ", idx)
        video, action, reward, done, N = data
        #print("video.shape: ", video.shape)

        memory_state = tf.zeros([1,512], dtype=np.float32)
        carry_state =  tf.zeros([1,512], dtype=np.float32)
        with tf.GradientTape() as tape:
            prediction = model(tf.expand_dims(video, 0), memory_state, carry_state, training=True)
            
            act_pi = prediction[0]
            memory_state = prediction[1]
            carry_state = prediction[2]

            act_dist = tfd.Categorical(logits=act_pi)
            pre_action = act_dist.sample()[0]

            action = tf.cast(tf.squeeze(action), tf.int32)
            action_onehot = tf.one_hot(action, num_actions)
            
            #print("action_onehot.shape: ", action_onehot.shape)
            #print("act_pi[0].shape: ", act_pi[0].shape)

            print("action: ", action)
            print("pre_action: ", pre_action)

            #print("action_onehot.shape: ", action_onehot.shape)
            #print("act_pi[0].shape: ", act_pi[0].shape)
            act_loss = cce_loss(action_onehot, act_pi[0])

            regularization_loss = tf.reduce_sum(model.losses)

            total_loss = act_loss + 1e-5 * regularization_loss

            print('epoch: {}, idx: {}, total_loss: {}'.format(epoch, idx, total_loss))
            print("")
            
            mean_loss(total_loss)

        grads = tape.gradient(total_loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if idx % 100 == 0:
            display.clear_output(wait=False)
            
    if epoch % 10 == 0:
        model.save_weights("model/IDM_Model_{0}".format(epoch))

action:  tf.Tensor([1 5 3 6 4 0 3 3 6 4 4 5 0 5 6 6 3 6 4 4], shape=(20,), dtype=int32)
pre_action:  tf.Tensor([1 6 6 6 2 4 2 1 6 5 5 4 0 5 4 6 2 6 3 0], shape=(20,), dtype=int32)
epoch: 617, idx: 4701, total_loss: 1.1928611993789673

action:  tf.Tensor([6 0 0 2 6 4 5 3 2 1 1 5 2 5 4 6 5 5 5 2], shape=(20,), dtype=int32)
pre_action:  tf.Tensor([2 0 0 1 6 3 3 4 4 0 1 5 6 3 4 4 4 2 0 4], shape=(20,), dtype=int32)
epoch: 617, idx: 4702, total_loss: 1.4617618322372437

action:  tf.Tensor([6 1 3 6 0 5 6 4 4 6 3 3 2 3 6 5 0 4 3 1], shape=(20,), dtype=int32)
pre_action:  tf.Tensor([6 0 2 6 1 5 3 5 4 3 4 4 4 3 2 2 1 0 3 0], shape=(20,), dtype=int32)
epoch: 617, idx: 4703, total_loss: 1.3158818483352661

action:  tf.Tensor([5 3 5 2 2 3 0 5 3 3 4 1 1 4 3 0 5 4 2 5], shape=(20,), dtype=int32)
pre_action:  tf.Tensor([6 2 2 2 2 6 1 5 4 4 4 1 2 2 1 2 6 3 3 0], shape=(20,), dtype=int32)
epoch: 617, idx: 4704, total_loss: 1.2960988283157349

action:  tf.Tensor([1 6 2 6 4 3 0 5 1 6 0 2 4 3 4 2 1 6 1 1]