In [1]:
# Create GAN for independent and identically distributed data

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import secrets
from tqdm import tqdm
import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import math
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds

import keras
from keras import layers
from keras import ops
import gym
import d4rl


class DataGenerator:
    def __init__(self, seq_len=32):

        self.seq_len = seq_len
        self.env = gym.make("maze2d-large-dense-v1")
        self.dataset = self.env.get_dataset()
        self.bias_observation = np.min(self.dataset["observations"], axis=0)
        self.mult_observation = np.max(self.dataset["observations"], axis=0) - self.bias_observation
        print(f"bias_observation: {self.bias_observation} and mult_observation: {self.mult_observation}")

    def generate_real_samples(self, n_samples):
        X = []
        labels = []
        total_len = self.dataset["observations"].shape[0]
        for _ in tqdm(range(n_samples)):

            i = np.random.randint(0, total_len - self.seq_len)
            norm_obs, norm_act, norm_rew = self.normalize_values(i)
            labels.append(norm_obs[0])
            x = np.concatenate([norm_obs, norm_act, norm_rew], axis=1)
            X.append(x)
        return np.array(X), np.array(labels)

    def normalize_values(self, i):
        norm_obs = (self.dataset["observations"][i : i + self.seq_len] - self.bias_observation) / self.mult_observation
        norm_act = (self.dataset["actions"][i : i + self.seq_len] + 1.0) / 2.0
        norm_rew = self.dataset["rewards"][i : i + self.seq_len].reshape(-1, 1)
        return norm_obs, norm_act, norm_rew

    def denormalize_values(self, X):
        X = np.array(X)
        obs = X[:, :, :4] * self.mult_observation + self.bias_observation
        act = X[:, :, 4:6] * 2.0 - 1.0
        rew = X[:, :, 6].reshape(-1, 32, 1)
        return np.concatenate([obs, act, rew], axis=2)
    def normalize_observation(self, x):
        return (x - self.bias_observation) / self.mult_observation

datagen = DataGenerator()
data = datagen.generate_real_samples(50000)
data2 = datagen.generate_real_samples(50000)

2024-05-06 05:03:43.565807: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-06 05:03:45.956066: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/TensorRT-8.4.0.6//lib:/usr/local/cuda-11.6/lib64:/usr/local/apps/python-3.10.2/lib:/home2/shivam.sood/.mujoco/mujoco210/bin:/home2/shivam.sood/.mujoco/mujoco210/bin:/usr/local/apps/cuDNN/8.4.0-cuda-11.6/lib
2024-05-06 05:03:45.957001: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.

bias_observation: [ 0.39643136  0.44179875 -5.2262554  -5.2262554 ] and mult_observation: [ 6.819953  9.777689 10.452511 10.452511]


100%|██████████████████████████████████| 50000/50000 [00:01<00:00, 44744.20it/s]
100%|██████████████████████████████████| 50000/50000 [00:01<00:00, 46296.28it/s]


In [2]:
data = tf.data.Dataset.from_tensor_slices(data)
data2 = tf.data.Dataset.from_tensor_slices(data2)

2024-05-06 05:04:00.469594: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-05-06 05:04:01.071781: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9626 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 2080 Ti, pci bus id: 0000:02:00.0, compute capability: 7.5


In [3]:
# data
# data = tf.data.Dataset.from_tensor_slices(data)
# data2 = tf.data.Dataset.from_tensor_slices(data2)
num_epochs = 1  # train for at least 50 epochs for good results
image_size = 32


# sampling
min_signal_rate = 0.02
max_signal_rate = 0.95

# architecture
embedding_dims = 32
embedding_max_frequency = 1000.0
widths = [64, 128, 256, 512]
has_attention = [False, False, True, True]
block_depth = 2

# optimization
batch_size = 512
ema = 0.999
learning_rate = 1e-4
weight_decay = 1e-4

In [4]:

def preprocess_image(data, labels):
    # center crop image
    # height = ops.shape(data["image"])[0]
    # width = ops.shape(data["image"])[1]
    # crop_size = ops.minimum(height, width)
    # image = tf.image.crop_to_bounding_box(
    #     data["image"],
    #     (height - crop_size) // 2,
    #     (width - crop_size) // 2,
    #     crop_size,
    #     crop_size,
    # )

    # resize and clip
    # for image downsampling it is important to turn on antialiasing
    # image = tf.image.grayscale_to_rgb(data["image"])
    # image = tf.image.resize(image, size=[image_size, image_size], antialias=True)
    return ops.clip(data, 0.0, 1.0), ops.clip(labels,0.0,1.0)


def prepare_dataset(dataset_name):
    # the validation dataset is shuffled as well, because data order matters
    # for the KID estimation
    return (
        dataset_name
        .map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
        .cache()
        .repeat(1)
        .shuffle(10 * batch_size)
        .batch(batch_size, drop_remainder=True)
        .prefetch(buffer_size=tf.data.AUTOTUNE)
    )


# load dataset
train_dataset = prepare_dataset(data)
val_dataset = prepare_dataset(data)

In [5]:
@keras.saving.register_keras_serializable()
def sinusoidal_embedding(x):
    embedding_min_frequency = 1.0
    frequencies = ops.exp(
        ops.linspace(
            ops.log(embedding_min_frequency),
            ops.log(embedding_max_frequency),
            embedding_dims // 2,
        )
    )
    angular_speeds = ops.cast(2.0 * math.pi * frequencies, "float32")
    embeddings = ops.concatenate(
        [ops.sin(angular_speeds * x), ops.cos(angular_speeds * x)], axis=2
    )
    return embeddings


def ResidualBlock(width):
    def apply(x, temb):
        input_width = x.shape[2]
        if input_width == width:
            residual = x
        else:
            residual = layers.Conv1D(width, kernel_size=1)(x)
        x = layers.BatchNormalization(center=False, scale=False)(x)
        x = layers.Conv1D(width, kernel_size=3, padding="same", activation="swish")(x)
        temb = layers.Dense(width, activation="swish")(temb)
        x = layers.Add()([x, temb])
        x = layers.Conv1D(width, kernel_size=3, padding="same")(x)
        x = layers.Add()([x, residual])
        return x

    return apply


def DownBlock(i, width, block_depth):
    def apply(x, temb):
        x, skips = x
        for _ in range(block_depth):
            x = ResidualBlock(width)(x, temb)
            skips.append(x)
        # x = layers.AveragePooling1D(pool_size=2)(x)
        x = layers.Conv1D(width, kernel_size=3, strides=2, padding="same")(x)
        return x

    return apply


def UpBlock(i, width, block_depth):
    def apply(x, temb):
        x, skips = x
        x = layers.UpSampling1D(size=2)(x)
        x = layers.Conv1D(width, kernel_size=3, padding="same")(x)
        for _ in range(block_depth):
            x = layers.Concatenate()([x, skips.pop()])
            x = ResidualBlock(width)(x, temb)
        return x

    return apply


def get_network(image_size, widths, block_depth):
    noisy_images = keras.Input(shape=(image_size, 7))
    noise_variances = keras.Input(shape=(1, 1))
    labels = keras.Input(shape=(4,))
    # print("DASDA")
    e = layers.Lambda(sinusoidal_embedding, output_shape=(1, 32))(noise_variances)
    # l = layers.Embedding(2, 32)(labels)
    
    # print(e.shape,l.shape)
    temb = e
    # temb = keras.layers.Dense(32)(e)
    # temb = keras.layers.Dense(32)(temb)
    # l = keras.layers.Dense(32)(labels)
    l = keras.layers.Dense(32)(labels)
    l = keras.layers.Dense(32 * 4)(l)
    l = keras.layers.Dense(32, activation="sigmoid")(l)
    l = layers.Reshape((1,-1))(l)
    temb = keras.layers.Concatenate()([temb, l])
    e = layers.UpSampling1D(size=image_size)(temb)
    
    

    x = layers.Conv1D(widths[0], kernel_size=1)(noisy_images)
    x = layers.Concatenate()([x, e])

    # skips = []
    skips = [x]
    for i, width in enumerate(widths):
        x = DownBlock(i, width, block_depth)([x, skips], temb)
        


    for i in range(block_depth):
        x = ResidualBlock(width)(x, temb)
        # if i != block_depth - 1:
        #     x = layers.MultiHeadAttention(
        #         num_heads=8, key_dim=widths[-1] 
        #     )(x, x)

    for i, width in enumerate(reversed(widths)):
        x = UpBlock(i, width, block_depth)([x, skips], temb)
        
    x = layers.Concatenate()([x, skips.pop()])
    x = layers.Conv1D(7, kernel_size=1, kernel_initializer="zeros")(x)
    # x2 = layers.Conv1D(3, kernel_size=1, kernel_initializer="zeros", activation='softmax')(x)
    # x3 = layers.Conv1D(1, kernel_size=1, kernel_initializer="zeros", activation='sigmoid')(x)
    # x = layers.Concatenate()([x1,x2,x3])

    return keras.Model([noisy_images, noise_variances, labels], x, name="residual_unet")

In [6]:
@keras.saving.register_keras_serializable()
class DiffusionModel(keras.Model):
    def __init__(self, image_size, widths, block_depth):
        super().__init__()

        self.normalizer = layers.Normalization()
        self.network = get_network(image_size, widths, block_depth)
        self.ema_network = keras.models.clone_model(self.network)

    def compile(self, **kwargs):
        super().compile(**kwargs)

        self.noise_loss_tracker = keras.metrics.Mean(name="n_loss")
        self.image_loss_tracker = keras.metrics.Mean(name="i_loss")

    @property
    def metrics(self):
        return [self.noise_loss_tracker, self.image_loss_tracker]

    def denormalize(self, images):
        # convert the pixel values back to 0-1 range
        images = self.normalizer.mean + images * self.normalizer.variance**0.5
        return ops.clip(images, 0.0, 1.0)

    def diffusion_schedule(self, diffusion_times):
        # diffusion times -> angles
        start_angle = ops.cast(ops.arccos(max_signal_rate), "float32")
        end_angle = ops.cast(ops.arccos(min_signal_rate), "float32")

        diffusion_angles = start_angle + diffusion_times * (end_angle - start_angle)

        # angles -> signal and noise rates
        signal_rates = ops.cos(diffusion_angles)
        noise_rates = ops.sin(diffusion_angles)
        # note that their squared sum is always: sin^2(x) + cos^2(x) = 1

        return noise_rates, signal_rates

    def denoise(self, noisy_images, noise_rates, signal_rates, training, labels):
        # the exponential moving average weights are used at evaluation
        if training:
            network = self.network
        else:
            network = self.ema_network

        # predict noise component and calculate the image component using it
        # print(noisy_images.shape,noise_rates.shape, labels.shape)
        pred_noises = network([noisy_images, noise_rates**2, labels], training=training)
        pred_images = (noisy_images - noise_rates * pred_noises) / signal_rates

        return pred_noises, pred_images

    def reverse_diffusion(self, initial_noise, diffusion_steps, labels):
        # reverse diffusion = sampling
        num_images = initial_noise.shape[0]
        step_size = 1.0 / diffusion_steps

        # important line:
        # at the first sampling step, the "noisy image" is pure noise
        # but its signal rate is assumed to be nonzero (min_signal_rate)
        next_noisy_images = initial_noise
        for step in range(diffusion_steps):
            noisy_images = next_noisy_images

            # separate the current noisy image to its components
            diffusion_times = ops.ones((num_images, 1, 1)) - step * step_size
            noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
            pred_noises, pred_images = self.denoise(
                noisy_images, noise_rates, signal_rates, training=False, labels=labels
            )
            # network used in eval mode

            # remix the predicted components using the next signal and noise rates
            next_diffusion_times = diffusion_times - step_size
            next_noise_rates, next_signal_rates = self.diffusion_schedule(
                next_diffusion_times
            )
            next_noisy_images = (
                next_signal_rates * pred_images + next_noise_rates * pred_noises
            )
            # this new noisy image will be used in the next step

        return pred_images

    def fill_reverse_diffusion(self, initial_image, initial_noise, diffusion_steps, labels):
        # reverse diffusion = sampling
        num_images = initial_noise.shape[0]
        step_size = 1.0 / diffusion_steps
        determined_len = initial_image.shape[0]
        fill_len = image_size - determined_len
        initial_images = tf.reshape(initial_image,(1, determined_len, 7))
        initial_images = tf.tile(initial_images, [num_images, 1, 1])
        initial_images = self.normalizer(initial_images, training=False)

        
        # important line:
        # at the first sampling step, the "noisy image" is pure noise
        # but its signal rate is assumed to be nonzero (min_signal_rate)
        next_noisy_images = initial_noise
        for step in range(diffusion_steps+1):
            noisy_images = next_noisy_images
            # separate the current noisy image to its components
            diffusion_times = ops.ones((num_images, 1, 1)) - step * step_size
            noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
            pred_noises, pred_images = self.denoise(
                noisy_images, noise_rates, signal_rates, training=False, labels=labels
            )
            # network used in eval mode
            
            
            # print(initial_images.shape, pred_images[:, determined_len:].shape, determined_len)
            pred_images = tf.concat([initial_images, pred_images[:, determined_len:]], axis=1)

            # pred_noises = tf.concat([tf.zeros((num_images,determined_len,6)), pred_noises[:, determined_len:]], axis=1)
            # print(pred_images.shape, image_size)
            # remix the predicted components using the next signal and noise rates
            next_diffusion_times = diffusion_times - step_size
            next_noise_rates, next_signal_rates = self.diffusion_schedule(
                next_diffusion_times
            )
            next_noisy_images = (
                next_signal_rates * pred_images + next_noise_rates * pred_noises
            )
            # this new noisy image will be used in the next step
        # print(initial_images)
        return pred_images

    def generate_fill(self, num_images, diffusion_steps, labels, initial_image):
        # noise -> images -> denormalized images
        initial_noise = keras.random.normal(shape=(num_images, image_size, 7))
        generated_images = self.fill_reverse_diffusion(
            initial_image, initial_noise, diffusion_steps, labels
        )
        generated_images = self.denormalize(generated_images)
        return generated_images
    def generate(self, num_images, diffusion_steps, labels):
        # noise -> images -> denormalized images
        initial_noise = keras.random.normal(shape=(num_images, image_size, 7))
        generated_images = self.reverse_diffusion(
            initial_noise, diffusion_steps, labels
        )
        generated_images = self.denormalize(generated_images)
        return generated_images

    def train_step(self, images):
        # print(images[0].shape,images[1].shape)
        # normalize images to have standard deviation of 1, like the noises
        images, labels = images
        # print(images.shape)
        images = self.normalizer(images, training=True)
        # print(images.shape)
        noises = keras.random.normal(shape=(batch_size, image_size, 7))

        # sample uniform random diffusion times
        diffusion_times = keras.random.uniform(
            shape=(batch_size, 1, 1), minval=0.0, maxval=1.0
        )
        # print(images.shape)
        noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
        # mix the images with noises accordingly
        # print(images.shape,signal_rates.shape)
        noisy_images = signal_rates * images + noise_rates * noises
        # print(noisy_images.shape)
        with tf.GradientTape() as tape:
            # train the network to separate noisy images to their components
            pred_noises, pred_images = self.denoise(
                noisy_images, noise_rates, signal_rates, training=True, labels=labels
            )

            noise_loss = self.loss(noises, pred_noises)  # used for training
            image_loss = self.loss(images, pred_images)  # only used as metric

        gradients = tape.gradient(noise_loss, self.network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.network.trainable_weights))

        self.noise_loss_tracker.update_state(noise_loss)
        self.image_loss_tracker.update_state(image_loss)

        # track the exponential moving averages of weights
        for weight, ema_weight in zip(self.network.weights, self.ema_network.weights):
            ema_weight.assign(ema * ema_weight + (1 - ema) * weight)

        # KID is not measured during the training phase for computational efficiency
        return {m.name: m.result() for m in self.metrics[:-1]}

    def test_step(self, images):
        images, labels = images
        # normalize images to have standard deviation of 1, like the noises
        images = self.normalizer(images, training=False)
        noises = keras.random.normal(shape=(batch_size, image_size, 7))

        # sample uniform random diffusion times
        diffusion_times = keras.random.uniform(
            shape=(batch_size, 1, 1), minval=0.0, maxval=1.0
        )
        noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
        # mix the images with noises accordingly
        noisy_images = signal_rates * images + noise_rates * noises

        # use the network to separate noisy images to their components
        pred_noises, pred_images = self.denoise(
            noisy_images, noise_rates, signal_rates, training=False, labels=labels
        )

        noise_loss = self.loss(noises, pred_noises)
        image_loss = self.loss(images, pred_images)

        self.image_loss_tracker.update_state(image_loss)
        self.noise_loss_tracker.update_state(noise_loss)

        # measure KID between real and generated images
        # this is computationally demanding, kid_diffusion_steps has to be small
        # images = self.denormalize(images)
        # generated_images = self.generate(
        #     num_images=batch_size, diffusion_steps=kid_diffusion_steps
        # )
        # self.kid.update_state(images, generated_images)

        return {m.name: m.result() for m in self.metrics}

    def plot_images(self, epoch=None, logs=None, num_rows=1, num_cols=1):
        # plot random generated images for visual evaluation of generation quality
        generated_images = self.generate(
            num_images=num_rows * num_cols,
            diffusion_steps=20,
        )

        print(generated_images)

In [7]:
# create and compile the model
model = DiffusionModel(image_size, widths, block_depth)
# below tensorflow 2.9:
# pip install tensorflow_addons
# import tensorflow_addons as tfa
# optimizer=tfa.optimizers.AdamW
opt = keras.optimizers.Adam(learning_rate=learning_rate)
model.compile(
    # optimizer=keras.optimizers.AdamW(
    #     learning_rate=learning_rate, weight_decay=weight_decay
    # ),
    optimizer=opt,
    loss=keras.losses.mean_absolute_error,
)
# pixelwise mean absolute error is used as loss

# save the best model based on the validation KID metric
# checkpoint_path = "checkpoints/diffusion_model.weights.h5"
# checkpoint_callback = keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_path,
#     save_weights_only=True,
#     monitor="val_i_loss",
#     # mode="min",
#     save_best_only=True,
# )

# calculate mean and variance of training dataset for normalization
# images = np.array([x for x, y in train_dataset])
# print(images.shape)
model.normalizer.adapt(train_dataset.map(lambda x, y: x))
# print(np.array([y for x, y in train_dataset]).shape)
# run training and plot generated images periodically
# model.fit(
#     train_dataset,
#     epochs=5,
#     validation_data=val_dataset,
#     callbacks=[
#         # keras.callbacks.LambdaCallback(on_epoch_end=model.plot_images),
#         # checkpoint_callback,
#     ],
# )

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [8]:
model.load_weights('tdiffusion5.weights.h5')

  trackable.load_own_variables(weights_store.get(inner_path))


In [14]:

model.fit(
    train_dataset,
    epochs=500,
    validation_data=val_dataset,
    callbacks=[
        # keras.callbacks.LambdaCallback(on_epoch_end=model.plot_images),
        # checkpoint_callback,
    ],
)

Epoch 1/500





[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 78ms/step - n_loss: 0.1199 - i_loss: 0.1760 - val_i_loss: 0.1621 - val_n_loss: 0.1136
Epoch 2/500
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 78ms/step - n_loss: 0.1191 - i_loss: 0.1744 - val_i_loss: 0.1611 - val_n_loss: 0.1131
Epoch 3/500
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 79ms/step - n_loss: 0.1185 - i_loss: 0.1731 - val_i_loss: 0.1599 - val_n_loss: 0.1127
Epoch 4/500
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 79ms/step - n_loss: 0.1177 - i_loss: 0.1714 - val_i_loss: 0.1587 - val_n_loss: 0.1122
Epoch 5/500
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 79ms/step - n_loss: 0.1175 - i_loss: 0.1701 - val_i_loss: 0.1575 - val_n_loss: 0.1116
Epoch 6/500
[1m976/976[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 80ms/step - n_loss: 0.1168 - i_loss: 0.1693 - val_i_loss: 0.1562 - val_n_loss: 0.1112
Epoch 7/500
[1m976/9


KeyboardInterrupt



In [15]:
model.save_weights('tdiffusion5.weights.h5')

In [16]:
# load the best model and generate images
# model.load_weights(checkpoint_path")
label = np.array([7.0,9.0, 0.0, 0.0])
label_norm = datagen.normalize_observation(label)
# print(label_norm)
labels = np.array([label_norm for i in range(1000)])
labels = tf.convert_to_tensor(labels)
x = (model.generate(1000,100, labels))

In [17]:
x_fixed = datagen.denormalize_values(x)

# print(x_fixed[0])

In [18]:
def calculate_final_P(X):
        
    
    X1 = X[:, 0, :4]
    # print(X1.shape)
    print(f"Average Predicted 1st Observation: {np.mean(X1, axis=0)}")
    
# print(x_fixed.shape)
calculate_final_P(x_fixed)
print(f"Actual Required 1st Observation:{label}")

Average Predicted 1st Observation: [ 6.9736533   8.97504    -0.06219367 -0.01605392]
Actual Required 1st Observation:[7. 9. 0. 0.]


In [22]:
model.network.summary()

In [9]:
import gym
import d4rl
env = gym.make('maze2d-large-dense-v1')


In [10]:
dataset = env.get_dataset()
print(dataset.keys())

load datafile: 100%|██████████████████████████████| 8/8 [00:01<00:00,  5.62it/s]

dict_keys(['actions', 'infos/goal', 'infos/qpos', 'infos/qvel', 'observations', 'rewards', 'terminals', 'timeouts'])





In [11]:
print(np.min(dataset['observations'], axis=0), np.max(dataset['observations'], axis=0))
print(np.min(dataset['actions'], axis=0), np.max(dataset['actions'], axis=0))
print(np.min(dataset['rewards'], axis=0), np.max(dataset['rewards'], axis=0))

[ 0.39643136  0.44179875 -5.2262554  -5.2262554 ] [ 7.2163844 10.219488   5.2262554  5.2262554]
[-1. -1.] [1. 1.]
2.88651197829991e-05 0.9996158823918246


In [12]:
print(dataset['observations'][1387387])
# print(dataset['observations'][0], dataset['rewards'][0])

[ 7.0003724   9.000094   -0.01831077  1.1157913 ]


In [13]:
np.sum(dataset['rewards'] < 0.1)/dataset['rewards'].shape

array([0.907916])

In [48]:
s0 = env.reset()
print(s0)

[ 3.92490306  6.00160729 -0.23803947  0.00796401]


In [49]:
# load the best model and generate images
# model.load_weights(checkpoint_path")
label = np.array(s0)
label_norm = datagen.normalize_observation(label)
# print(label_norm)
labels = np.array([label_norm for i in range(5000)])
labels = tf.convert_to_tensor(labels)
x = (model.generate(5000,100, labels))

In [50]:
x_fixed = datagen.denormalize_values(x)
s0 = np.array(s0)

In [51]:
# print(np.sum(np.abs(x_fixed[:,0,:4] - s0)  ,axis=1).shape  )
i = np.argmin(np.sum(np.abs(x_fixed[:,0,:4] - s0)  , axis=1)  )
print(i)

4396


In [52]:
print(x_fixed[i])

[[ 3.93026757e+00  5.99956799e+00 -2.38622665e-01  8.88442993e-03
  -1.00000000e+00  4.48112726e-01  1.29926354e-02]
 [ 3.91741800e+00  6.01006603e+00 -5.16413689e-01  3.92122269e-02
  -9.59312677e-01 -6.40149117e-01  1.21651385e-02]
 [ 3.90835500e+00  6.01233816e+00 -7.25089550e-01 -3.79133224e-02
  -9.77167249e-01  7.52516985e-02  1.35332290e-02]
 [ 3.89533496e+00  6.01046562e+00 -9.14278984e-01 -1.42984390e-02
  -3.98664892e-01  5.22074938e-01  1.35659408e-02]
 [ 3.88817263e+00  5.99189520e+00 -1.01210260e+00  7.63216019e-02
  -9.76623476e-01 -3.47467721e-01  1.34902019e-02]
 [ 3.87375569e+00  5.99039125e+00 -1.25162268e+00  2.75135040e-02
  -9.17816103e-01  1.92434072e-01  1.31146926e-02]
 [ 3.86309981e+00  6.01179838e+00 -1.43562055e+00  6.88767433e-02
  -9.16553557e-01  8.08020592e-01  1.36141982e-02]
 [ 3.84808207e+00  6.02047873e+00 -1.63742328e+00  2.51340389e-01
  -8.18664134e-01 -4.69867587e-02  1.25710610e-02]
 [ 3.82629514e+00  5.99745226e+00 -1.78378487e+00  2.49330521e-0

In [53]:
s = s0
r = np.exp(-1 * np.sqrt(np.sum((s[:2]-np.array([7,9]))**2)))
X_true = []
for j in range(32):
    # tst = list(x_fixed[i][j][4:6])
    # print(tst)
    # ac = env.step(tst)
    ac = x_fixed[i][j][4:6].astype('float32')
    print(s,ac, r)
    X_true.append(np.concatenate((np.array(s,dtype='float32'),np.array(ac,dtype='float32'),[r])))
    s,r,_,_ = env.step(ac)
    
X_true = np.array(X_true)
    

[ 3.92490306  6.00160729 -0.23803947  0.00796401] [-1.          0.44811273] 0.013637278084500796
[ 3.92014669  6.00275398 -0.47563638  0.11466929] [-0.9593127 -0.6401491] 0.013601773000254565
[ 3.91311692  6.00237334 -0.70297718 -0.03806418] [-0.97716725  0.0752517 ] 0.013529794948087905
[ 3.90377663  6.00217283 -0.93402885 -0.02005129] [-0.3986649   0.52207494] 0.013437497961028926
[ 3.89350912  6.00321619 -1.02675189  0.10433584] [-0.9766235  -0.34746772] 0.013348328920457545
[ 3.88094009  6.00342952 -1.25690294  0.0213331 ] [-0.9178161   0.19243407] 0.013229964015628402
[ 3.86621509  6.00410065 -1.47250006  0.06711313] [-0.91655356  0.8080206 ] 0.013096139486762295
[ 3.84934226  6.00669459 -1.68728301  0.25939458] [-0.81866413 -0.04698676] 0.012960324244275729
[ 3.83055985  6.00917046 -1.8782407   0.24758625] [-1.        -0.7698533] 0.012806543701320576
[ 3.80944054  6.00980691 -2.11193125  0.06364537] [-0.96329886  0.14352   ] 0.012616523644911476
[ 3.78607729  6.01078366 -2.336324

In [54]:
def final_calc(x_true,x_pred):
    x_pred1 = x_pred[:,:4]
    x_true1 = x_true[:,:4]
    
    print(f"Observation Percent Error: {np.mean(np.abs(x_pred1 - x_true1)/np.abs(x_true1)) * 100:0.2f}%")
    x_pred1 = x_pred[:,4:6]
    x_true1 = x_true[:,4:6]
    print(f"Action Percent Error: {np.mean(np.abs(x_pred1 - x_true1)/np.abs(x_true1)) * 100}%")
    x_pred1 = x_pred[:,6]
    x_true1 = x_true[:,6]
    print(f"Reward Percent Error: {np.mean(np.abs(x_pred1 - x_true1)/np.abs(x_true1)) * 100:0.2f}%")
final_calc(X_true,x_fixed[i])

Observation Percent Error: 6.93%
Action Percent Error: 0.0%
Reward Percent Error: 4.28%


In [60]:
def print_path(x):
    for i,elem in enumerate(x):
        print(f"Time {i}:Ball at Position ({elem[0]:.2f},{elem[1]:.2f}) with velocity ({elem[2]:.2f},{elem[3]:.2f}), action Force: ({elem[4]:.2f},{elem[5]:.2f}) giving reward {elem[6]:.4f}")

print_path(x_fixed[i])

Time 0:Ball at Position (3.93,6.00) with velocity (-0.24,0.01), action Force: (-1.00,0.45) giving reward 0.0130
Time 1:Ball at Position (3.92,6.01) with velocity (-0.52,0.04), action Force: (-0.96,-0.64) giving reward 0.0122
Time 2:Ball at Position (3.91,6.01) with velocity (-0.73,-0.04), action Force: (-0.98,0.08) giving reward 0.0135
Time 3:Ball at Position (3.90,6.01) with velocity (-0.91,-0.01), action Force: (-0.40,0.52) giving reward 0.0136
Time 4:Ball at Position (3.89,5.99) with velocity (-1.01,0.08), action Force: (-0.98,-0.35) giving reward 0.0135
Time 5:Ball at Position (3.87,5.99) with velocity (-1.25,0.03), action Force: (-0.92,0.19) giving reward 0.0131
Time 6:Ball at Position (3.86,6.01) with velocity (-1.44,0.07), action Force: (-0.92,0.81) giving reward 0.0136
Time 7:Ball at Position (3.85,6.02) with velocity (-1.64,0.25), action Force: (-0.82,-0.05) giving reward 0.0126
Time 8:Ball at Position (3.83,6.00) with velocity (-1.78,0.25), action Force: (-1.00,-0.77) giving 

In [56]:
print(x_fixed[i].shape)

(32, 7)
