In [75]:
# Create GAN for independent and identically distributed data

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import secrets

import os

os.environ["KERAS_BACKEND"] = "tensorflow"

import math
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds

import keras
from keras import layers
from keras import ops
from tqdm import tqdm


class DataGenerator:
    def __init__(self, seq_len=32, latent_dim=25):
        # self.P = np.random.uniform(0, 1, (6, 6))
        # print(self.P, np.sum(self.P, axis=1, keepdims=True))
        # self.P = self.P / np.sum(self.P, axis=1, keepdims=True)
        self.states = [0, 1]
        self.actions = [0, 1, 2]
        self.latent_dim = latent_dim
        self.seq_len = seq_len
        # print(self.P)
        # self.build_generator()
        # self.build_discriminator()
        # self.build_gan()
        # print(self.P)

    def generate_real_samples(self, n_samples):
        X_total = []
        labels = []
        for i in tqdm(range(n_samples)):
            current_state = np.random.choice([0.0, 1.0])
            policy = np.array(self.select_policy())
            labels.append(policy)
            X = []
            while len(X) < self.seq_len:
                if current_state == 0:
                    action = np.random.choice([0, 1], p=policy)
                    if action == 0:
                        next_state = np.random.choice([0, 1])
                        if next_state == 0:
                            reward = 15
                        else:
                            reward = 15
                    else:
                        next_state = 1
                        reward = -1
                else:
                    # action = 2
                    # next_state = 1
                    # reward = -1
                    action = np.random.choice([1, 2], p=policy)
                    if action == 2:
                        next_state = np.random.choice([0, 1])
                        if next_state == 0:
                            reward = 15
                        else:
                            reward = 15
                    else:
                        next_state = 0
                        reward = 20
                current_state_cat = keras.utils.to_categorical(
                    current_state, num_classes=2
                )
                action_cat = keras.utils.to_categorical(action, num_classes=3)
                reward = (reward + 1) / 21
                X.append(np.concatenate([current_state_cat, action_cat, [reward]]))
                current_state = next_state

            X = np.array(X)
            # X = tf.keras.utils.to_categorical(X, num_classes=6)
            X_total.append(X)
        X = np.array(X_total)
        y = np.ones((n_samples, 1)) * 0.9
        # print(X.shape, y.shape)
        # labels = tf.keras.utils.to_categorical(labels, num_classes=4)
        labels = np.array(labels)
        return X, labels

    def select_policy(self):
        if np.random.uniform() < 0.1:
            if np.random.uniform() <= 0.5:
                policy = [1.0, 0.0]
            else:
                policy = [0.0, 1.0]
        else:
            policy = np.random.uniform(0, 1, 2)
            policy = policy / np.sum(policy)
        # policy = np.array([0.7, 0.3])
        return policy

    def show_episode(self, x):
        x1 = np.argmax(x[:, :2], axis=1)
        x2 = np.argmax(x[:, 2:5], axis=1)
        x3 = x[:, 5]
        for i in range(self.seq_len):
            print(
                "State:", x1[i], "Action:", x2[i], "Reward:", np.rint((x3[i] * 21) - 1)
            )

    def calculate_final_P(self, X):

        X1 = np.argmax(X[:, :, :2], axis=2)
        X2 = np.argmax(X[:, :, 2:5], axis=2)
        X3 = X[:, :, 5]
        # X1 = np.argmax(X1, axis=1)
        # X2 = np.argmax(X2, axis=1)
        print("Probabilitity of starting position being 0:", np.mean(X1[:, 0] == 0))
        print("Probabilitity of starting position being 1:", np.mean(X1[:, 0] == 1))

        print(
            "Probability of action 0 being taken in state 0:", np.mean(X2[X1 == 0] == 0)
        )
        print(
            "Probability of action 1 being taken in state 0:", np.mean(X2[X1 == 0] == 1)
        )
        print(
            "Probability of action 2 being taken in state 1:", np.mean(X2[X1 == 1] == 2)
        )
        ps = np.zeros((2, 3, 2))
        for i in range(n_samples):
            for j in range(self.seq_len - 1):
                ps[X1[i, j], X2[i, j], X1[i, j + 1]] += 1
        ps = ps / np.sum(ps, axis=2, keepdims=True)
        print(ps)


datagen = DataGenerator()
data = datagen.generate_real_samples(100000)
data2 = datagen.generate_real_samples(1000)

100%|██████████████████████████████████| 100000/100000 [03:29<00:00, 476.69it/s]
100%|██████████████████████████████████████| 1000/1000 [00:02<00:00, 478.83it/s]


In [76]:
data = tf.data.Dataset.from_tensor_slices(data)
data2 = tf.data.Dataset.from_tensor_slices(data2)

In [77]:
# data
# data = tf.data.Dataset.from_tensor_slices(data)
# data2 = tf.data.Dataset.from_tensor_slices(data2)
num_epochs = 1  # train for at least 50 epochs for good results
image_size = 32


# sampling
min_signal_rate = 0.02
max_signal_rate = 0.95

# architecture
embedding_dims = 32
embedding_max_frequency = 1000.0
widths = [32, 64, 96, 128]
has_attention = [False, False, True, True]
block_depth = 2

# optimization
batch_size = 64
ema = 0.999
learning_rate = 1e-3
weight_decay = 1e-4

In [78]:

def preprocess_image(data, labels):
    # center crop image
    # height = ops.shape(data["image"])[0]
    # width = ops.shape(data["image"])[1]
    # crop_size = ops.minimum(height, width)
    # image = tf.image.crop_to_bounding_box(
    #     data["image"],
    #     (height - crop_size) // 2,
    #     (width - crop_size) // 2,
    #     crop_size,
    #     crop_size,
    # )

    # resize and clip
    # for image downsampling it is important to turn on antialiasing
    # image = tf.image.grayscale_to_rgb(data["image"])
    # image = tf.image.resize(image, size=[image_size, image_size], antialias=True)
    return ops.clip(data, 0.0, 1.0), ops.clip(labels,0.0,1.0)


def prepare_dataset(dataset_name):
    # the validation dataset is shuffled as well, because data order matters
    # for the KID estimation
    return (
        dataset_name
        .map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
        .cache()
        .repeat(1)
        .shuffle(10 * batch_size)
        .batch(batch_size, drop_remainder=True)
        .prefetch(buffer_size=tf.data.AUTOTUNE)
    )


# load dataset
train_dataset = prepare_dataset(data)
val_dataset = prepare_dataset(data)

In [79]:
@keras.saving.register_keras_serializable()
def sinusoidal_embedding(x):
    embedding_min_frequency = 1.0
    frequencies = ops.exp(
        ops.linspace(
            ops.log(embedding_min_frequency),
            ops.log(embedding_max_frequency),
            embedding_dims // 2,
        )
    )
    angular_speeds = ops.cast(2.0 * math.pi * frequencies, "float32")
    embeddings = ops.concatenate(
        [ops.sin(angular_speeds * x), ops.cos(angular_speeds * x)], axis=2
    )
    return embeddings


def ResidualBlock(width):
    def apply(x, temb):
        input_width = x.shape[2]
        if input_width == width:
            residual = x
        else:
            residual = layers.Conv1D(width, kernel_size=1)(x)
        x = layers.BatchNormalization(center=False, scale=False)(x)
        x = layers.Conv1D(width, kernel_size=3, padding="same", activation="swish")(x)
        temb = layers.Dense(width, activation="swish")(temb)
        x = layers.Add()([x, temb])
        x = layers.Conv1D(width, kernel_size=3, padding="same")(x)
        x = layers.Add()([x, residual])
        return x

    return apply


def DownBlock(width, block_depth):
    def apply(x, temb):
        x, skips = x
        for _ in range(block_depth):
            x = ResidualBlock(width)(x, temb)
            skips.append(x)
        # x = layers.AveragePooling1D(pool_size=2)(x)
        x = layers.Conv1D(width, kernel_size=3, strides=2, padding="same")(x)
        return x

    return apply


def UpBlock(width, block_depth):
    def apply(x, temb):
        x, skips = x
        x = layers.UpSampling1D(size=2)(x)
        x = layers.Conv1D(width, kernel_size=3, padding="same")(x)
        for _ in range(block_depth):
            x = layers.Concatenate()([x, skips.pop()])
            x = ResidualBlock(width)(x, temb)
        return x

    return apply


def get_network(image_size, widths, block_depth):
    noisy_images = keras.Input(shape=(image_size, 6))
    noise_variances = keras.Input(shape=(1, 1))
    labels = keras.Input(shape=(2,))
    # print("DASDA")
    e = layers.Lambda(sinusoidal_embedding, output_shape=(1, 32))(noise_variances)
    # l = layers.Embedding(2, 32)(labels)
    
    # print(e.shape,l.shape)
    temb = e
    # temb = keras.layers.Dense(32)(e)
    # temb = keras.layers.Dense(32)(temb)
    # l = keras.layers.Dense(32)(labels)
    l = keras.layers.Dense(32)(labels)
    l = keras.layers.Dense(32 * 4)(l)
    l = keras.layers.Dense(32, activation="sigmoid")(l)
    l = layers.Reshape((1,-1))(l)
    temb = keras.layers.Concatenate()([temb, l])
    e = layers.UpSampling1D(size=image_size)(temb)
    
    

    x = layers.Conv1D(widths[0], kernel_size=1)(noisy_images)
    x = layers.Concatenate()([x, e])

    skips = []
    for i, width in enumerate(widths):
        x = DownBlock(width, block_depth)([x, skips], temb)
        


    for i in range(block_depth):
        x = ResidualBlock(width)(x, temb)
        # if i != block_depth - 1:
        #     x = layers.MultiHeadAttention(
        #         num_heads=8, key_dim=widths[-1] 
        #     )(x, x)

    for i, width in enumerate(reversed(widths)):
        x = UpBlock(width, block_depth)([x, skips], temb)
        

    x = layers.Conv1D(6, kernel_size=1, kernel_initializer="zeros")(x)
    # x2 = layers.Conv1D(3, kernel_size=1, kernel_initializer="zeros", activation='softmax')(x)
    # x3 = layers.Conv1D(1, kernel_size=1, kernel_initializer="zeros", activation='sigmoid')(x)
    # x = layers.Concatenate()([x1,x2,x3])

    return keras.Model([noisy_images, noise_variances, labels], x, name="residual_unet")

In [80]:
@keras.saving.register_keras_serializable()
class DiffusionModel(keras.Model):
    def __init__(self, image_size, widths, block_depth):
        super().__init__()

        self.normalizer = layers.Normalization()
        self.network = get_network(image_size, widths, block_depth)
        self.ema_network = keras.models.clone_model(self.network)

    def compile(self, **kwargs):
        super().compile(**kwargs)

        self.noise_loss_tracker = keras.metrics.Mean(name="n_loss")
        self.image_loss_tracker = keras.metrics.Mean(name="i_loss")

    @property
    def metrics(self):
        return [self.noise_loss_tracker, self.image_loss_tracker]

    def denormalize(self, images):
        # convert the pixel values back to 0-1 range
        images = self.normalizer.mean + images * self.normalizer.variance**0.5
        return ops.clip(images, 0.0, 1.0)

    def diffusion_schedule(self, diffusion_times):
        # diffusion times -> angles
        start_angle = ops.cast(ops.arccos(max_signal_rate), "float32")
        end_angle = ops.cast(ops.arccos(min_signal_rate), "float32")

        diffusion_angles = start_angle + diffusion_times * (end_angle - start_angle)

        # angles -> signal and noise rates
        signal_rates = ops.cos(diffusion_angles)
        noise_rates = ops.sin(diffusion_angles)
        # note that their squared sum is always: sin^2(x) + cos^2(x) = 1

        return noise_rates, signal_rates

    def denoise(self, noisy_images, noise_rates, signal_rates, training, labels):
        # the exponential moving average weights are used at evaluation
        if training:
            network = self.network
        else:
            network = self.ema_network

        # predict noise component and calculate the image component using it
        # print(noisy_images.shape,noise_rates.shape, labels.shape)
        pred_noises = network([noisy_images, noise_rates**2, labels], training=training)
        pred_images = (noisy_images - noise_rates * pred_noises) / signal_rates

        return pred_noises, pred_images

    def reverse_diffusion(self, initial_noise, diffusion_steps, labels):
        # reverse diffusion = sampling
        num_images = initial_noise.shape[0]
        step_size = 1.0 / diffusion_steps

        # important line:
        # at the first sampling step, the "noisy image" is pure noise
        # but its signal rate is assumed to be nonzero (min_signal_rate)
        next_noisy_images = initial_noise
        for step in range(diffusion_steps):
            noisy_images = next_noisy_images

            # separate the current noisy image to its components
            diffusion_times = ops.ones((num_images, 1, 1)) - step * step_size
            noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
            pred_noises, pred_images = self.denoise(
                noisy_images, noise_rates, signal_rates, training=False, labels=labels
            )
            # network used in eval mode

            # remix the predicted components using the next signal and noise rates
            next_diffusion_times = diffusion_times - step_size
            next_noise_rates, next_signal_rates = self.diffusion_schedule(
                next_diffusion_times
            )
            next_noisy_images = (
                next_signal_rates * pred_images + next_noise_rates * pred_noises
            )
            # this new noisy image will be used in the next step

        return pred_images

    def fill_reverse_diffusion(self, initial_image, initial_noise, diffusion_steps, labels):
        # reverse diffusion = sampling
        num_images = initial_noise.shape[0]
        step_size = 1.0 / diffusion_steps
        determined_len = initial_image.shape[0]
        fill_len = image_size - determined_len
        initial_images = tf.reshape(initial_image,(1, determined_len, 6))
        initial_images = tf.tile(initial_images, [num_images, 1, 1])
        initial_images = self.normalizer(initial_images, training=False)

        
        # important line:
        # at the first sampling step, the "noisy image" is pure noise
        # but its signal rate is assumed to be nonzero (min_signal_rate)
        next_noisy_images = initial_noise
        for step in range(diffusion_steps+1):
            noisy_images = next_noisy_images
            # separate the current noisy image to its components
            diffusion_times = ops.ones((num_images, 1, 1)) - step * step_size
            noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
            pred_noises, pred_images = self.denoise(
                noisy_images, noise_rates, signal_rates, training=False, labels=labels
            )
            # network used in eval mode
            
            
            # print(initial_images.shape, pred_images[:, determined_len:].shape, determined_len)
            pred_images = tf.concat([initial_images, pred_images[:, determined_len:]], axis=1)

            # pred_noises = tf.concat([tf.zeros((num_images,determined_len,6)), pred_noises[:, determined_len:]], axis=1)
            # print(pred_images.shape, image_size)
            # remix the predicted components using the next signal and noise rates
            next_diffusion_times = diffusion_times - step_size
            next_noise_rates, next_signal_rates = self.diffusion_schedule(
                next_diffusion_times
            )
            next_noisy_images = (
                next_signal_rates * pred_images + next_noise_rates * pred_noises
            )
            # this new noisy image will be used in the next step
        # print(initial_images)
        return pred_images

    def generate_fill(self, num_images, diffusion_steps, labels, initial_image):
        # noise -> images -> denormalized images
        initial_noise = keras.random.normal(shape=(num_images, image_size, 6))
        generated_images = self.fill_reverse_diffusion(
            initial_image, initial_noise, diffusion_steps, labels
        )
        generated_images = self.denormalize(generated_images)
        return generated_images
    def generate(self, num_images, diffusion_steps, labels):
        # noise -> images -> denormalized images
        initial_noise = keras.random.normal(shape=(num_images, image_size, 6))
        generated_images = self.reverse_diffusion(
            initial_noise, diffusion_steps, labels
        )
        generated_images = self.denormalize(generated_images)
        return generated_images

    def train_step(self, images):
        # print(images[0].shape,images[1].shape)
        # normalize images to have standard deviation of 1, like the noises
        images, labels = images
        # print(images.shape)
        images = self.normalizer(images, training=True)
        # print(images.shape)
        noises = keras.random.normal(shape=(batch_size, image_size, 6))

        # sample uniform random diffusion times
        diffusion_times = keras.random.uniform(
            shape=(batch_size, 1, 1), minval=0.0, maxval=1.0
        )
        # print(images.shape)
        noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
        # mix the images with noises accordingly
        # print(images.shape,signal_rates.shape)
        noisy_images = signal_rates * images + noise_rates * noises
        # print(noisy_images.shape)
        with tf.GradientTape() as tape:
            # train the network to separate noisy images to their components
            pred_noises, pred_images = self.denoise(
                noisy_images, noise_rates, signal_rates, training=True, labels=labels
            )

            noise_loss = self.loss(noises, pred_noises)  # used for training
            image_loss = self.loss(images, pred_images)  # only used as metric

        gradients = tape.gradient(noise_loss, self.network.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.network.trainable_weights))

        self.noise_loss_tracker.update_state(noise_loss)
        self.image_loss_tracker.update_state(image_loss)

        # track the exponential moving averages of weights
        for weight, ema_weight in zip(self.network.weights, self.ema_network.weights):
            ema_weight.assign(ema * ema_weight + (1 - ema) * weight)

        # KID is not measured during the training phase for computational efficiency
        return {m.name: m.result() for m in self.metrics[:-1]}

    def test_step(self, images):
        images, labels = images
        # normalize images to have standard deviation of 1, like the noises
        images = self.normalizer(images, training=False)
        noises = keras.random.normal(shape=(batch_size, image_size, 6))

        # sample uniform random diffusion times
        diffusion_times = keras.random.uniform(
            shape=(batch_size, 1, 1), minval=0.0, maxval=1.0
        )
        noise_rates, signal_rates = self.diffusion_schedule(diffusion_times)
        # mix the images with noises accordingly
        noisy_images = signal_rates * images + noise_rates * noises

        # use the network to separate noisy images to their components
        pred_noises, pred_images = self.denoise(
            noisy_images, noise_rates, signal_rates, training=False, labels=labels
        )

        noise_loss = self.loss(noises, pred_noises)
        image_loss = self.loss(images, pred_images)

        self.image_loss_tracker.update_state(image_loss)
        self.noise_loss_tracker.update_state(noise_loss)

        # measure KID between real and generated images
        # this is computationally demanding, kid_diffusion_steps has to be small
        # images = self.denormalize(images)
        # generated_images = self.generate(
        #     num_images=batch_size, diffusion_steps=kid_diffusion_steps
        # )
        # self.kid.update_state(images, generated_images)

        return {m.name: m.result() for m in self.metrics}

    def plot_images(self, epoch=None, logs=None, num_rows=1, num_cols=1):
        # plot random generated images for visual evaluation of generation quality
        generated_images = self.generate(
            num_images=num_rows * num_cols,
            diffusion_steps=20,
        )

        print(generated_images)

In [81]:
# create and compile the model
model = DiffusionModel(image_size, widths, block_depth)
# below tensorflow 2.9:
# pip install tensorflow_addons
# import tensorflow_addons as tfa
# optimizer=tfa.optimizers.AdamW
model.compile(
    optimizer=keras.optimizers.AdamW(
        learning_rate=learning_rate, weight_decay=weight_decay
    ),
    loss=keras.losses.mean_absolute_error,
)
# pixelwise mean absolute error is used as loss

# save the best model based on the validation KID metric
# checkpoint_path = "checkpoints/diffusion_model.weights.h5"
# checkpoint_callback = keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_path,
#     save_weights_only=True,
#     monitor="val_i_loss",
#     # mode="min",
#     save_best_only=True,
# )

# calculate mean and variance of training dataset for normalization
# images = np.array([x for x, y in train_dataset])
# print(images.shape)
model.normalizer.adapt(train_dataset.map(lambda x, y: x))
# print(np.array([y for x, y in train_dataset]).shape)
# run training and plot generated images periodically
# model.fit(
#     train_dataset,
#     epochs=5,
#     validation_data=val_dataset,
#     callbacks=[
#         # keras.callbacks.LambdaCallback(on_epoch_end=model.plot_images),
#         # checkpoint_callback,
#     ],
# )

In [None]:
model.fit(
    train_dataset,
    epochs=500,
    validation_data=val_dataset,
    batch_size=256,
    callbacks=[
        # keras.callbacks.LambdaCallback(on_epoch_end=model.plot_images),
        # checkpoint_callback,
    ],
)

Epoch 1/500





[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 25ms/step - n_loss: 0.2878 - i_loss: 0.4732 - val_i_loss: 1.7661 - val_n_loss: 0.6018
Epoch 2/500
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 23ms/step - n_loss: 0.1336 - i_loss: 0.3343 - val_i_loss: 0.6560 - val_n_loss: 0.2517
Epoch 3/500
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 22ms/step - n_loss: 0.1168 - i_loss: 0.3110 - val_i_loss: 0.3337 - val_n_loss: 0.1230
Epoch 4/500
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 22ms/step - n_loss: 0.1093 - i_loss: 0.2983 - val_i_loss: 0.2770 - val_n_loss: 0.0941
Epoch 5/500
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 22ms/step - n_loss: 0.1040 - i_loss: 0.2902 - val_i_loss: 0.2638 - val_n_loss: 0.0867
Epoch 6/500
[1m1562/1562[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 23ms/step - n_loss: 0.1008 - i_loss: 0.2854 - val_i_loss: 0.2585 - val_n_loss: 0.0838
Epoch 7/50

In [51]:
# load the best model and generate images
# model.load_weights(checkpoint_path)
labels = np.array([[0.7,0.3] for i in range(1000)])
labels = tf.convert_to_tensor(labels)
x = (model.generate(1000,20, labels))

In [59]:
datagen.show_episode(x[9])

State: 0 Action: 0 Reward: 5.0
State: 0 Action: 0 Reward: 5.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 Action: 2 Reward: -1.0
State: 1 A

In [58]:
for ind,seq in enumerate(x):
    if np.argmax(seq[0,:2]) == 0 and np.argmax(seq[1,:2]) == 0:
        print(ind)
        break

9


In [74]:
def calculate_final_P(X):
        
    X1 = np.argmax(X[:, :, :2], axis=2)
    X2 = np.argmax(X[:, :, 2:5], axis=2)
    X3 = (X[:, :, 5] * 21) - 1
    # X1 = np.argmax(X1, axis=1)
    # X2 = np.argmax(X2, axis=1)
    print("Probabilitity of starting position being 0:", np.mean(X1[:, 0] == 0))
    print("Probabilitity of starting position being 1:", np.mean(X1[:, 0] == 1))

    print(
        "Probability of action 0 being taken in state 0:", np.mean(X2[X1 == 0] == 0)
    )
    print(
        "Probability of action 1 being taken in state 0:", np.mean(X2[X1 == 0] == 1)
    )
    print(
        "Probability of action 2 being taken in state 0:", np.mean(X2[X1 == 0] == 2)
    )
    print(
        "Probability of action 0 being taken in state 1:", np.mean(X2[X1 == 1] == 0)
    )
    print(
        "Probability of action 1 being taken in state 1:", np.mean(X2[X1 == 1] == 1)
    )
    print(
        "Probability of action 2 being taken in state 1:", np.mean(X2[X1 == 1] == 2)
    )
    ps = np.zeros((2, 3, 2))
    for i in tqdm(range(x.shape[0])):
        for j in range(32 - 1):
            ps[X1[i, j], X2[i, j], X1[i, j + 1]] += 1
    tmp = ps
    ps = ps / np.sum(ps, axis=2, keepdims=True)
    print(ps)

    rs = np.zeros((2, 3, 2))
    for i in tqdm(range(x.shape[0])):
        for j in range(32 - 1):
            rs[X1[i, j], X2[i, j], X1[i, j + 1]] += X3[i][j]
    rs = rs / tmp
    print(rs)
calculate_final_P(x)

Probabilitity of starting position being 0: 1.0
Probabilitity of starting position being 1: 0.0
Probability of action 0 being taken in state 0: 0.4892665474060823
Probability of action 1 being taken in state 0: 0.5107334525939177
Probability of action 2 being taken in state 0: 0.0
Probability of action 0 being taken in state 1: 0.0
Probability of action 1 being taken in state 1: 0.0
Probability of action 2 being taken in state 1: 1.0


100%|████████████████████████████████████| 1000/1000 [00:00<00:00, 29467.42it/s]
  ps = ps / np.sum(ps, axis=2, keepdims=True)


[[[0.21572212 0.78427788]
  [0.         1.        ]
  [       nan        nan]]

 [[       nan        nan]
  [       nan        nan]
  [0.         1.        ]]]


 14%|█████▍                                  | 136/1000 [00:03<00:24, 34.63it/s]


KeyboardInterrupt: 

In [15]:
init_img = []
current_state = 1
for i in range(8):
    if current_state == 0:
        action = np.random.choice([0, 1], p=[0.5,0.5])
        if action == 0:
            next_state = np.random.choice([0, 1])
            if next_state == 0:
                reward = 15
            else:
                reward = 15
        else:
            next_state = 1
            reward = -1
    else:
        # action = 2
        # next_state = 1
        # reward = -1
        action = np.random.choice([1, 2], p=[0.5,0.5])
        if action == 2:
            next_state = np.random.choice([0, 1])
            if next_state == 0:
                reward = 15
            else:
                reward = 15
        else:
            next_state = 0
            reward = 20
    st_cat = keras.utils.to_categorical(current_state, num_classes=2)
    act_cat = keras.utils.to_categorical(action, num_classes=3)
    norm_reward = (reward + 1) / 21
    init_img.append(np.concatenate([st_cat, act_cat, [norm_reward]]))
    current_state = next_state
init_img = np.array(init_img)
init_img = init_img.astype('float32')
init_img = tf.convert_to_tensor(init_img)
tst = model.generate_fill(1, 20, tf.convert_to_tensor(np.array([[0, 1]])), init_img)
print(init_img)
print('--------------------------------')
datagen.show_episode(tst[0])

tf.Tensor(
[[0.        1.        0.        0.        1.        0.7619048]
 [0.        1.        0.        0.        1.        0.7619048]
 [0.        1.        0.        0.        1.        0.7619048]
 [0.        1.        0.        1.        0.        1.       ]
 [1.        0.        0.        1.        0.        0.       ]
 [0.        1.        0.        1.        0.        1.       ]
 [1.        0.        1.        0.        0.        0.7619048]
 [0.        1.        0.        1.        0.        1.       ]], shape=(8, 6), dtype=float32)
--------------------------------
State: 1 Action: 2 Reward: 15.0
State: 1 Action: 2 Reward: 15.0
State: 1 Action: 2 Reward: 15.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 0 Reward: 15.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 2 Reward: 15.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 2 Reward: 15.0
State: 1 Action: 2 Reward: 15.0
State

In [17]:
model.network.summary()

In [17]:
noise1 = keras.random.normal(shape=(1, image_size, 6))
noise2 = keras.random.normal(shape=(1, image_size, 6))
avg_noise = (noise1*0.5 + noise2*0.5)
# avg_noise = keras.random.normal(shape=(1, image_size, 6))

In [18]:
p = tf.convert_to_tensor(np.array([[0.5, 0.5]]))
pred_data1 = model.denormalize(model.reverse_diffusion(noise1, 20, p))
pred_data2 = model.denormalize(model.reverse_diffusion(noise2, 20, p))
pred_data_avg = model.denormalize(model.reverse_diffusion(avg_noise, 20, p))

datagen.show_episode(pred_data1[0])
print("------------------")
datagen.show_episode(pred_data2[0])
print("------------------")
datagen.show_episode(pred_data_avg[0])

State: 0 Action: 1 Reward: -1.0
State: 1 Action: 2 Reward: 15.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 0 Reward: 15.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 2 Reward: 15.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 0 Reward: 15.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 0 Reward: 15.0
State: 1 Action: 2 Reward: 15.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 2 Reward: 15.0
State: 0 Action: 1 Reward: -1.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 0 Reward: 15.0
State: 0 Action: 0 Reward: 15.0
State: 1 Action: 1 Reward: 20.0
State: 0 Action: 0 Reward: 15.0
State: 1 Action: 2 Reward: 15.0
State: 1 Action: 2 Reward: 15.0
State: 0 Action: 1 Reward: -1.0
State: 1

In [19]:
s1 = 0
s2 = 0
c = 0
for i in range(image_size):
    # if (np.rint(pred_data1[0,i,:5]) == np.rint(pred_data2[0,i, :5])).all() and np.rint(pred_data1[0,i,5] * 21 - 1) == np.rint(pred_data2[0,i,5] * 21 - 1):
    if (np.rint(pred_data1[0,i,:5]) == np.rint(pred_data2[0,i, :5])).all():    
        print("SAME")
        c += 1
    # elif (np.rint(pred_data1[0,i,:5]) == np.rint(pred_data_avg[0,i,:5])).all() and np.rint(pred_data1[0,i,5] * 21 - 1) == np.rint(pred_data_avg[0,i,5] * 21 - 1):
    elif (np.rint(pred_data1[0,i,:5]) == np.rint(pred_data_avg[0,i,:5])).all() :
        print("Seq 1")
        s1 += 1
    # elif (np.rint(pred_data2[0,i,:5]) == np.rint(pred_data_avg[0,i,:5])).all() and np.rint(pred_data2[0,i,5] * 21 - 1) == np.rint(pred_data_avg[0,i,5] * 21 - 1):
    elif (np.rint(pred_data2[0,i,:5]) == np.rint(pred_data_avg[0,i,:5])).all() :
        print("Seq 2")
        s2 += 1
    else:
        print("UNKNOWN")
print(s1,s2,c)
print(s1+s2+c,"/",32)

Seq 2
Seq 2
Seq 1
Seq 1
Seq 1
Seq 1
Seq 1
Seq 1
Seq 1
Seq 1
UNKNOWN
Seq 1
SAME
SAME
SAME
Seq 2
UNKNOWN
Seq 2
SAME
SAME
SAME
SAME
Seq 2
Seq 1
Seq 1
Seq 2
SAME
Seq 1
Seq 2
Seq 2
Seq 2
Seq 1
13 9 8
30 / 32


In [21]:
# DO Q LEARNING ONLINE

Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 10000

for i in range(n_episodes):
    current_state = np.random.choice([0, 1])
    for j in range(32):
        if np.random.uniform() < epsilon:
            # action = np.random.choice([0, 1, 2])
            if current_state == 0:
                action = np.random.choice([0, 1])
            else:
                action = np.random.choice([1, 2])
        else:
            action = np.argmax(Q[current_state])
        if current_state == 0:
            if action == 0:
                next_state = np.random.choice([0, 1])
                if next_state == 0:
                    reward = 15
                else:
                    reward = 15
            else:
                next_state = 1
                reward = -1
        else:
            if action == 2:
                next_state = np.random.choice([0, 1])
                if next_state == 0:
                    reward = 15
                else:
                    reward = 15
            else:
                next_state = 0
                reward = 20
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state

        
print(Q)

[[1665.27369586 1651.80002796    0.        ]
 [   0.         1668.55340097 1667.00855187]]


In [23]:
# Do Q learning offline with the real data
Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 10000

real_data, _ = datagen.generate_real_samples(10000)

for i in range(n_episodes):
    current_state = np.argmax(real_data[i, 0, :2])
    for j in range(31):
        reward = np.rint(real_data[i, j, 5] * 21) - 1
        action = np.argmax(real_data[i, j, 2:5])
        next_state = np.argmax(real_data[i, j + 1, :2])
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state

print(Q)

100%|████████████████████████████████████████████████████████████████████| 10000/10000 [00:22<00:00, 439.92it/s]


[[1668.57182561 1654.28223963    0.        ]
 [   0.         1672.05963787 1668.60572279]]


In [28]:
# Do Q learning offline with the generated data
Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 10000
labels = np.array([[0.5, 0.5] for i in range(10000)])
labels = tf.convert_to_tensor(labels)
gen_data = model.generate(10000, 20, labels)
print("DONE GENERATING DATA")
for i in tqdm(range(n_episodes)):
    current_state = np.argmax(gen_data[i, 0, :2])
    for j in range(31):
        reward = np.rint(gen_data[i, j, 5] * 21) - 1
        action = np.argmax(gen_data[i, j, 2:5])
        if current_state == 0 and action == 2:
            action = np.argmax(gen_data[i, j, 2:4])
        if current_state == 1 and action == 0:
            action = np.argmax(gen_data[i, j, 3:5]) + 1
        next_state = np.argmax(gen_data[i, j + 1, :2])
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state

print(Q)

DONE GENERATING DATA


100%|█████████████████████████████████████████████████████████████████████| 10000/10000 [07:52<00:00, 21.18it/s]

[[1680.00686376 1665.47914289    0.        ]
 [   0.         1683.25816937 1680.95232681]]





In [32]:
# Do Q learning with combined data
Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 10000

combined_data = np.concatenate([real_data[:5000], gen_data[:5000]], axis=0)
print(combined_data.shape)
for i in tqdm(range(n_episodes)):
    current_state = np.argmax(combined_data[i, 0, :2])
    for j in range(31):
        reward = np.rint(combined_data[i, j, 5] * 21) - 1
        action = np.argmax(combined_data[i, j, 2:5])
        next_state = np.argmax(combined_data[i, j + 1, :2])
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state

print(Q)

(10000, 32, 6)


100%|███████████████████████████████████████████████████████████████████| 10000/10000 [00:03<00:00, 2524.79it/s]

[[1678.38702832 1663.38723441    0.        ]
 [   0.         1681.32857771 1678.30172049]]





In [34]:
# Do Q learning offline with the generated data
Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 10000
labels = np.array([[0.5, 0.5] for i in range(10000)])
labels = tf.convert_to_tensor(labels)
gen_data = model.generate(10000, 20, labels)
print("DONE GENERATING DATA")
for i in tqdm(range(n_episodes)):
    current_state = np.argmax(gen_data[i, 0, :2])
    for j in range(31):
        reward = np.rint(gen_data[i, j, 5] * 21) - 1
        action = np.argmax(gen_data[i, j, 2:5])
        next_state = np.argmax(gen_data[i, j + 1, :2])
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state

print(Q)

DONE GENERATING DATA


100%|█████████████████████████████████████████████████████████████████████| 10000/10000 [07:53<00:00, 21.11it/s]

[[1680.84808642 1666.39104059    0.        ]
 [   0.         1684.11164637 1680.70384745]]





In [70]:
# DO Q LEARNING ONLINE

Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 10000

for i in range(n_episodes):
    # current_state = np.random.choice([0, 1])
    current_state = 0
    for j in range(32):
        if np.random.uniform() < epsilon:
            # action = np.random.choice([0, 1, 2])
            if current_state == 0:
                action = np.random.choice([0, 1])
            else:
                # action = np.random.choice([2])
                action=2
        else:
            action = np.argmax(Q[current_state])
        if current_state == 0:
            if action == 0:
                next_state = np.random.choice([0, 1])
                if next_state == 0:
                    reward = 5
                else:
                    reward = 5
            else:
                next_state = 1
                reward = 10
        else:
           
            next_state = 1
            reward = -1
        
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state
        
        if current_state == 1:
            break
print(Q)

[[ 9.54744624 10.          0.        ]
 [ 0.          0.          0.        ]]


In [73]:
# Do Q learning offline with the generated data
Q = np.zeros((2, 3))
gamma = 0.99
alpha = 0.1
epsilon = 0.1
n_episodes = 1000
# labels = np.array([[0.5, 0.5] for i in range(10000)])
# labels = tf.convert_to_tensor(labels)
gen_data = model.generate(10000, 20, labels)
gen_data = x
print("DONE GENERATING DATA")
for i in tqdm(range(n_episodes)):
    current_state = np.argmax(gen_data[i, 0, :2])
    for j in range(31):
        reward = np.rint(gen_data[i, j, 5] * 21) - 1
        action = np.argmax(gen_data[i, j, 2:5])
        if current_state == 0 and action == 2:
            action = np.argmax(gen_data[i, j, 2:4])
        if current_state == 1 and action == 0:
            action = np.argmax(gen_data[i, j, 3:5]) + 1
        next_state = np.argmax(gen_data[i, j + 1, :2])
        Q[current_state, action] = Q[current_state, action] + alpha * (
            reward + gamma * np.max(Q[next_state]) - Q[current_state, action]
        )
        current_state = next_state
        if current_state == 1:
            break

print(Q)

DONE GENERATING DATA


100%|██████████████████████████████████████| 1000/1000 [00:02<00:00, 458.95it/s]

[[14.22570974 19.96790272  0.        ]
 [ 0.          0.          0.        ]]



