In [4]:
import os
import sys
import h5py
import torch 
import random
import pygame
import numpy as np
import torch.nn as nn 
import numpy.random as npr
from datetime import datetime
import torch.nn.functional as F
from os.path import join, expanduser
sys.path.append("..")

from utility import DataLogger
import matplotlib.pyplot as plt 

In [8]:
data_dir = join('..', 'experiments', 'inverted_pendulum', 'data_files')
trajectory_samples = sorted(os.listdir(data_dir))
trajectory_samples = [join(data_dir, fname) for fname in trajectory_samples]
# print(trajectory_samples)
datalogger = DataLogger()

observation_state_pair = [np.nan for i in range(len(trajectory_samples))]
# collect saved observations and states
for i in range(len(trajectory_samples)):
    observation_state_pair[i] = datalogger.get_state(trajectory_samples[i], verbose=True)
observations = np.vstack([obs[0] for obs in observation_state_pair])
states = np.vstack([state[1] for state in observation_state_pair])

condition_00
condition_00/sample_00
condition_00/sample_01
condition_00/sample_02
condition_00/sample_03
condition_00/sample_04
condition_00/sample_05
condition_00/sample_06
condition_00/sample_07
condition_00/sample_08
condition_00/sample_09
condition_01
condition_01/sample_00
condition_01/sample_01


KeyError: "Unable to open object (object 'OBSERVATIONS_000' doesn't exist)"

In [5]:
# see: https://github.com/deepmind/sonnet/blob/v2/examples/vqvae_example.ipynb4
 
class ResidualStack(nn.Module):
  def __init__(self, num_hiddens, num_residual_layers, num_residual_hiddens):
    super(ResidualStack, self).__init__()
    self._num_hiddens = num_hiddens
    self._num_residual_layers = num_residual_layers
    self._num_residual_hiddens = num_residual_hiddens

    self._layers = []
    for i in range(num_residual_layers):
      conv3 = nn.Conv2D(
          out_channels=num_residual_hiddens,
          kernel_size=(3, 3),
          stride=(1, 1))
      conv1 = nn.Conv2D(
          out_channels=num_hiddens,
          kernel_size=(1, 1),
          stride=(1, 1))
      self._layers.append((conv3, conv1))

  def forward(self, inputs):
    h = inputs
    activation = nn.ReLU()
    for conv3, conv1 in self._layers:
      conv3_out = conv3(activation(h))
      conv1_out = conv1(activation(conv3_out))
      h += conv1_out
    return activation(h)  # Resnet V1 style


class Encoder(nn.Module):
  def __init__(self, num_hiddens, num_residual_layers, num_residual_hiddens):
    super(Encoder, self).__init__()
    self._num_hiddens = num_hiddens
    self._num_residual_layers = num_residual_layers
    self._num_residual_hiddens = num_residual_hiddens

    self._enc_1 = nn.Conv2D(
        in_channels=self._num_hiddens // 2, 
        out_channels=self._num_hiddens // 2,
        kernel_size=(4, 4),
        stride=(2, 2))
    self._enc_2 = nn.Conv2D(
        in_channels=self._num_hiddens,
        out_channels=self._num_hiddens,
        kernel_size=(4, 4),
        stride=(2, 2))
    self._enc_3 = nn.Conv2D(
        in_channels=self._num_hiddens,
        out_channels=self._num_hiddens,
        kernel_size=(3, 3),
        stride=(1, 1),)
    self._residual_stack = ResidualStack(
        self._num_hiddens,
        self._num_residual_layers,
        self._num_residual_hiddens)

  def forward(self, x):
    activation = nn.ReLU()

    h = activation(self._enc_1(x))
    h = activation(self._enc_2(h))
    h = activation(self._enc_3(h))
    return self._residual_stack(h)


class Decoder(nn.Module):
  def __init__(self, num_hiddens, num_residual_layers, num_residual_hiddens):
    super(Decoder, self).__init__()
    self._num_hiddens = num_hiddens
    self._num_residual_layers = num_residual_layers
    self._num_residual_hiddens = num_residual_hiddens

    self._dec_1 = nn.Conv2D(
        in_channels=self._num_hiddens,
        out_channels=self._num_hiddens,
        kernel_size=(3, 3),
        stride=(1, 1))
    self._residual_stack = ResidualStack(
        self._num_hiddens,
        self._num_residual_layers,
        self._num_residual_hiddens)
    self._dec_2 = nn.ConvTranspose2d(
        in_channels=self._num_hiddens,
        out_channels=self._num_hiddens // 2,
        output_shape=None,
        kernel_size=(4, 4),
        stride=(2, 2))
    self._dec_3 = nn.ConvTranspose2d(
        in_channels=3,
        out_channels=3,
        kernel_size=(4, 4),
        stride=(2, 2))

  def forward(self, x):
    h = self._dec_1(x)
    h = self._residual_stack(h)
    h = F.relu(self._dec_2(h))
    x_recon = self._dec_3(h)

    return x_recon


class VQVAEModel(nn.Module):
  def __init__(self, encoder, decoder, vqvae, pre_vq_conv1,
               data_variance):
    super(VQVAEModel, self).__init__()
    self._encoder = encoder
    self._decoder = decoder
    self._vqvae = vqvae
    self._pre_vq_conv1 = pre_vq_conv1
    self._data_variance = data_variance

  def forward(self, inputs, is_training):
    z = self._pre_vq_conv1(self._encoder(inputs))
    vq_output = self._vqvae(z, is_training=is_training)
    x_recon = self._decoder(vq_output['quantize'])
    recon_error = torch.mean((x_recon - inputs) ** 2) / self._data_variance
    loss = recon_error + vq_output['loss']
    return {
        'z': z,
        'x_recon': x_recon,
        'loss': loss,
        'recon_error': recon_error,
        'vq_output': vq_output,
    }


In [None]:
%%time

# Set hyper-parameters.
batch_size = 32
image_size = 32

# 100k steps should take < 30 minutes on a modern (>= 2017) GPU.
# 10k steps gives reasonable accuracy with VQVAE on Cifar10.
num_training_updates = 10000

num_hiddens = 128
num_residual_hiddens = 32
num_residual_layers = 2
# These hyper-parameters define the size of the model (number of parameters and layers).
# The hyper-parameters in the paper were (For ImageNet):
# batch_size = 128
# image_size = 128
# num_hiddens = 128
# num_residual_hiddens = 32
# num_residual_layers = 2

# This value is not that important, usually 64 works.
# This will not change the capacity in the information-bottleneck.
embedding_dim = 64

# The higher this value, the higher the capacity in the information bottleneck.
num_embeddings = 512

# commitment_cost should be set appropriately. It's often useful to try a couple
# of values. It mostly depends on the scale of the reconstruction cost
# (log p(x|z)). So if the reconstruction cost is 100x higher, the
# commitment_cost should also be multiplied with the same amount.
commitment_cost = 0.25

# Use EMA updates for the codebook (instead of the Adam optimizer).
# This typically converges faster, and makes the model less dependent on choice
# of the optimizer. In the VQ-VAE paper EMA updates were not used (but was
# developed afterwards). See Appendix of the paper for more details.
vq_use_ema = True

# This is only used for EMA updates.
decay = 0.99

learning_rate = 3e-4


# # Data Loading.
train_dataset = (
    tf.data.Dataset.from_tensor_slices(train_data_dict)
    .map(cast_and_normalise_images)
    .shuffle(10000)
    .repeat(-1)  # repeat indefinitely
    .batch(batch_size, drop_remainder=True)
    .prefetch(-1))

valid_dataset = (
    tf.data.Dataset.from_tensor_slices(valid_data_dict)
    .map(cast_and_normalise_images)
    .repeat(1)  # 1 epoch
    .batch(batch_size)
    .prefetch(-1))

# # Build modules.
encoder = Encoder(num_hiddens, num_residual_layers, num_residual_hiddens)
decoder = Decoder(num_hiddens, num_residual_layers, num_residual_hiddens)
pre_vq_conv1 = snt.Conv2D(output_channels=embedding_dim,
    kernel_shape=(1, 1),
    stride=(1, 1),
    name="to_vq")

if vq_use_ema:
  vq_vae = snt.nets.VectorQuantizerEMA(
      embedding_dim=embedding_dim,
      num_embeddings=num_embeddings,
      commitment_cost=commitment_cost,
      decay=decay)
else:
  vq_vae = snt.nets.VectorQuantizer(
      embedding_dim=embedding_dim,
      num_embeddings=num_embeddings,
      commitment_cost=commitment_cost)
  
model = VQVAEModel(encoder, decoder, vq_vae, pre_vq_conv1,
                   data_variance=train_data_variance)

optimizer = snt.optimizers.Adam(learning_rate=learning_rate)

@tf.function
def train_step(data):
  with tf.GradientTape() as tape:
    model_output = model(data['image'], is_training=True)
  trainable_variables = model.trainable_variables
  grads = tape.gradient(model_output['loss'], trainable_variables)
  optimizer.apply(grads, trainable_variables)

  return model_output

train_losses = []
train_recon_errors = []
train_perplexities = []
train_vqvae_loss = []

for step_index, data in enumerate(train_dataset):
  train_results = train_step(data)
  train_losses.append(train_results['loss'])
  train_recon_errors.append(train_results['recon_error'])
  train_perplexities.append(train_results['vq_output']['perplexity'])
  train_vqvae_loss.append(train_results['vq_output']['loss'])

  if (step_index + 1) % 100 == 0:
    print('%d train loss: %f ' % (step_index + 1,
                                   np.mean(train_losses[-100:])) +
          ('recon_error: %.3f ' % np.mean(train_recon_errors[-100:])) +
          ('perplexity: %.3f ' % np.mean(train_perplexities[-100:])) +
          ('vqvae loss: %.3f' % np.mean(train_vqvae_loss[-100:])))
  if step_index == num_training_updates:
    break

In [None]:


plt.imshow(obs[0])
plt.imshow(obs[1])
plt.imshow(obs[2])
plt.show()

In [None]:
with h5py.File('/tmp/test.hdf5', 'a') as fd:
    te = fd.create_group("test")
    te.create_dataset('array', data=np.arange(1, 10).reshape(3,3))

In [None]:
with h5py.File('/tmp/test.hdf5', 'r+') as fd:
    te = fd["test/array"] #, data=np.arange(1, 10).reshape(3,3))
    tenp = np.array(te, dtype=te.dtype)
    print(tenp)