diff --git a/vignettes/examples/eager_image_captioning.R b/vignettes/examples/eager_image_captioning.R
index c829cba61..1c1b5d56f 100644
--- a/vignettes/examples/eager_image_captioning.R
+++ b/vignettes/examples/eager_image_captioning.R
@@ -5,9 +5,7 @@
 #' https://blogs.rstudio.com/tensorflow/posts/2018-09-17-eager-captioning
 
 library(keras)
-use_implementation("tensorflow")
 library(tensorflow)
-tfe_enable_eager_execution(device_policy = "silent")
 
 np <- import("numpy")
 
@@ -32,10 +30,6 @@ debugshapes <- FALSE
 restore_checkpoint <- FALSE
 saved_features_exist <- FALSE
 
-use_session_with_seed(7777,
-                      disable_gpu = FALSE,
-                      disable_parallel_cpu = FALSE)
-
 annotation_file <- "train2014/annotations/captions_train2014.json"
 image_path <- "train2014/train2014"
 
@@ -138,7 +132,7 @@ top_k <- 5000
 tokenizer <- text_tokenizer(num_words = top_k,
                             oov_token = "<unk>",
                             filters = '!"#$%&()*+.,-/:;=?@[\\]^_`{|}~ ')
-tokenizer$fit_on_texts(sample_captions)
+fit_text_tokenizer(tokenizer, sample_captions)
 train_captions_tokenized <-
   tokenizer %>% texts_to_sequences(train_captions)
 validation_captions_tokenized <-
@@ -216,7 +210,7 @@ map_func <- function(img_name, cap) {
 train_dataset <-
   tensor_slices_dataset(list(train_images, train_captions_padded)) %>%
   dataset_map(function(item1, item2)
-    tf$py_func(map_func, list(item1, item2), list(tf$float32, tf$int32))) %>%
+    tf$py_function(map_func, list(item1, item2), list(tf$float32, tf$int32))) %>%
   # dataset_shuffle(buffer_size) %>%
   dataset_batch(batch_size) 
 
@@ -360,7 +354,7 @@ rnn_decoder <-
 encoder <- cnn_encoder(embedding_dim)
 decoder <- rnn_decoder(embedding_dim, gru_units, vocab_size)
 
-optimizer = tf$train$AdamOptimizer()
+optimizer = tf$optimizers$Adam()
 
 cx_loss <- function(y_true, y_pred) {
   mask <- 1 - k_cast(y_true == 0L, dtype = "float32")
@@ -530,8 +524,7 @@ if (!restore_checkpoint) {
       variables <- c(encoder$variables, decoder$variables)
       gradients <- tape$gradient(loss, variables)
       
-      optimizer$apply_gradients(purrr::transpose(list(gradients, variables)),
-                                global_step = tf$train$get_or_create_global_step())
+      optimizer$apply_gradients(purrr::transpose(list(gradients, variables)))
     })
     cat(paste0(
       "\n\nTotal loss (epoch): ",
diff --git a/vignettes/examples/nmt_attention.R b/vignettes/examples/nmt_attention.R
index 466f3198a..ec55f09c5 100644
--- a/vignettes/examples/nmt_attention.R
+++ b/vignettes/examples/nmt_attention.R
@@ -21,6 +21,17 @@ library(tibble)
 # Assumes you've downloaded and unzipped one of the bilingual datasets offered at
 # http://www.manythings.org/anki/ and put it into a directory "data"
 # This example translates English to Dutch.
+download_data = function(){
+  if(!dir.exists('data')) {
+    dir.create('data')
+  }
+  if(!file.exists('data/nld-eng.zip')) {
+    download.file('http://www.manythings.org/anki/nld-eng.zip',
+                  destfile = file.path("data", basename('nld-eng.zip')))
+    unzip('data/nld-eng.zip', exdir = 'data')
+  }
+}
+download_data()
 
 filepath <- file.path("data", "nld.txt")
 
@@ -290,7 +301,7 @@ evaluate <-
       attention_matrix[t,] <- attention_weights %>% as.double()
       
       pred_idx <-
-        tf$compat$v1$multinomial(k_exp(preds), num_samples = 1L)[1, 1] %>% as.double()
+        tf$random$categorical(k_exp(preds), num_samples = 1L)[1, 1] %>% as.double()
       pred_word <- index2word(pred_idx, target_index)
       
       if (pred_word == '<stop>') {
@@ -387,7 +398,7 @@ for (epoch in seq_len(n_epochs)) {
       ": ",
       (loss / k_cast_to_floatx(dim(y)[2])) %>% as.double() %>% round(4),
       "\n"
-    ) %>% print()
+    ) %>% cat()
     
     variables <- c(encoder$variables, decoder$variables)
     gradients <- tape$gradient(loss, variables)
@@ -402,7 +413,7 @@ for (epoch in seq_len(n_epochs)) {
     ": ",
     (total_loss / k_cast_to_floatx(buffer_size)) %>% as.double() %>% round(4),
     "\n"
-  ) %>% print()
+  ) %>% cat()
   
   walk(train_sentences[1:5], function(pair)
     translate(pair[1]))
diff --git a/vignettes/examples/tfprob_vae.R b/vignettes/examples/tfprob_vae.R
index 09394374f..141d8b123 100644
--- a/vignettes/examples/tfprob_vae.R
+++ b/vignettes/examples/tfprob_vae.R
@@ -5,13 +5,8 @@
 #' https://blogs.rstudio.com/tensorflow/posts/2019-01-08-getting-started-with-tf-probability/
 
 library(keras)
-use_implementation("tensorflow")
 library(tensorflow)
-tfe_enable_eager_execution(device_policy = "silent")
-
-tfp <- import("tensorflow_probability")
-tfd <- tfp$distributions
-
+library(tfprobability)
 library(tfdatasets)
 library(dplyr)
 library(glue)
@@ -79,6 +74,17 @@ np <- import("numpy")
 
 # assume data have been downloaded from https://github.com/rois-codh/kmnist
 # and stored in /tmp
+download_data = function(){
+  if(!dir.exists('tmp')) {
+    dir.create('tmp')
+  }
+  if(!file.exists('tmp/kmnist-train-imgs.npz')) {
+    download.file('http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-imgs.npz',
+                  destfile = file.path("tmp", basename('kmnist-train-imgs.npz')))
+  }
+}
+download_data()
+
 kuzushiji <- np$load("/tmp/kmnist-train-imgs.npz")
 kuzushiji <- kuzushiji$get("arr_0")
 
@@ -98,8 +104,8 @@ train_dataset <- tensor_slices_dataset(train_images) %>%
 
 # Params ------------------------------------------------------------------
 
-latent_dim <- 2
-mixture_components <- 16
+latent_dim <- 2L
+mixture_components <- 16L
 
 
 # Model -------------------------------------------------------------------
@@ -132,8 +138,8 @@ encoder_model <- function(name = NULL) {
         self$conv2() %>%
         self$flatten() %>%
         self$dense()
-      tfd$MultivariateNormalDiag(loc = x[, 1:latent_dim],
-                                 scale_diag = tf$nn$softplus(x[, (latent_dim + 1):(2 * latent_dim)] + 1e-5))
+      tfd_multivariate_normal_diag(loc = x[, 1:latent_dim],
+                                   scale_diag = tf$nn$softplus(x[, (latent_dim + 1):(2 * latent_dim)] + 1e-5))
     }
   })
 }
@@ -178,7 +184,7 @@ decoder_model <- function(name = NULL) {
         self$deconv2() %>%
         self$deconv3()
       
-      tfd$Independent(tfd$Bernoulli(logits = x),
+      tfd_independent(tfd_bernoulli(logits = x),
                       reinterpreted_batch_ndims = 3L)
       
     }
@@ -192,30 +198,30 @@ learnable_prior_model <-
     
     keras_model_custom(name = name, function(self) {
       self$loc <-
-        tf$get_variable(
+        tf$compat$v1$get_variable(
           name = "loc",
           shape = list(mixture_components, latent_dim),
           dtype = tf$float32
         )
-      self$raw_scale_diag <- tf$get_variable(
+      self$raw_scale_diag <- tf$compat$v1$get_variable(
         name = "raw_scale_diag",
         shape = c(mixture_components, latent_dim),
         dtype = tf$float32
       )
       self$mixture_logits <-
-        tf$get_variable(
+        tf$compat$v1$get_variable(
           name = "mixture_logits",
           shape = c(mixture_components),
           dtype = tf$float32
         )
       
       function (x, mask = NULL) {
-        tfd$MixtureSameFamily(
-          components_distribution = tfd$MultivariateNormalDiag(
+        tfd_mixture_same_family(
+          components_distribution = tfd_multivariate_normal_diag(
             loc = self$loc,
             scale_diag = tf$nn$softplus(self$raw_scale_diag)
           ),
-          mixture_distribution = tfd$Categorical(logits = self$mixture_logits)
+          mixture_distribution = tfd_categorical(logits = self$mixture_logits)
         )
       }
     })
@@ -234,8 +240,7 @@ compute_kl_loss <-
   }
 
 
-global_step <- tf$train$get_or_create_global_step()
-optimizer <- tf$train$AdamOptimizer(1e-4)
+optimizer <- tf$optimizers$Adam(1e-4)
 
 
 # Training loop -----------------------------------------------------------
@@ -253,7 +258,6 @@ checkpoint_prefix <- file.path(checkpoint_dir, "ckpt")
 checkpoint <-
   tf$train$Checkpoint(
     optimizer = optimizer,
-    global_step = global_step,
     encoder = encoder,
     decoder = decoder,
     latent_prior_model = latent_prior_model
@@ -284,7 +288,7 @@ for (epoch in seq_len(num_epochs)) {
         compute_kl_loss(latent_prior,
                         approx_posterior,
                         approx_posterior_sample)
-
+      
       loss <- kl_loss + avg_nll
     })
     
@@ -299,18 +303,15 @@ for (epoch in seq_len(num_epochs)) {
     
     optimizer$apply_gradients(purrr::transpose(list(
       encoder_gradients, encoder$variables
-    )),
-    global_step = tf$train$get_or_create_global_step())
+    )))
     optimizer$apply_gradients(purrr::transpose(list(
       decoder_gradients, decoder$variables
-    )),
-    global_step = tf$train$get_or_create_global_step())
+    )))
     optimizer$apply_gradients(purrr::transpose(list(
       prior_gradients, latent_prior_model$variables
-    )),
-    global_step = tf$train$get_or_create_global_step())
+    )))
     
-})
+  })
   
   checkpoint$save(file_prefix = checkpoint_prefix)
   
@@ -329,3 +330,4 @@ for (epoch in seq_len(num_epochs)) {
     show_grid(epoch)
   }
 }
+
diff --git a/vignettes/examples/vq_vae.R b/vignettes/examples/vq_vae.R
index c8c0b8680..c2eec02e3 100644
--- a/vignettes/examples/vq_vae.R
+++ b/vignettes/examples/vq_vae.R
@@ -5,20 +5,19 @@
 #' https://blogs.rstudio.com/tensorflow/posts/2019-01-24-vq-vae/
 
 library(keras)
-use_implementation("tensorflow")
 library(tensorflow)
-tfe_enable_eager_execution(device_policy = "silent")
-
-use_session_with_seed(7778,
-                      disable_gpu = FALSE,
-                      disable_parallel_cpu = FALSE)
-
-tfp <- import("tensorflow_probability")
-tfd <- tfp$distributions
-
+library(tfprobability)
 library(tfdatasets)
+
 library(dplyr)
 library(glue)
+
+# curry has to be installed from github because CRAN version has no "set_defaults" function
+if(!('devtools' %in% rownames(installed.packages()) )) {
+  install.packages('devtools')
+}
+devtools::install_github('thomasp85/curry')
+
 library(curry)
 
 moving_averages <- tf$python$training$moving_averages
@@ -63,7 +62,14 @@ write_png <- function(dataset, epoch, desc, images) {
 
 np <- import("numpy")
 
-# download from: https://github.com/rois-codh/kmnist
+# download from: https://github.com/rois-codh/kmnist via "download_data()" function
+download_data = function(){
+  if(!file.exists('kmnist-train-imgs.npz')) {
+    download.file('http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-imgs.npz',
+                  destfile = 'kmnist-train-imgs.npz')
+  }
+}
+download_data()
 kuzushiji <- np$load("kmnist-train-imgs.npz")
 kuzushiji <- kuzushiji$get("arr_0")
 
@@ -90,7 +96,7 @@ batch %>% dim()
 # Params ------------------------------------------------------------------
 
 learning_rate <- 0.001
-latent_size <- 1
+latent_size <- 1L
 num_codes <- 64L
 code_size <- 16L
 base_depth <- 32
@@ -214,7 +220,7 @@ decoder_model <- function(name = NULL,
         self$deconv6() %>%
         # output shape:  7 28 28 1
         self$conv1()
-      tfd$Independent(tfd$Bernoulli(logits = x),
+      tfd_independent(tfd_bernoulli(logits = x),
                       reinterpreted_batch_ndims = length(output_shape))
     }
   })
@@ -228,16 +234,16 @@ vector_quantizer_model <-
     keras_model_custom(name = name, function(self) {
       self$num_codes <- num_codes
       self$code_size <- code_size
-      self$codebook <- tf$get_variable("codebook",
+      self$codebook <- tf$compat$v1$get_variable("codebook",
                                        shape = c(num_codes, code_size),
                                        dtype = tf$float32)
-      self$ema_count <- tf$get_variable(
+      self$ema_count <- tf$compat$v1$get_variable(
         name = "ema_count",
         shape = c(num_codes),
         initializer = tf$constant_initializer(0),
         trainable = FALSE
       )
-      self$ema_means = tf$get_variable(
+      self$ema_means = tf$compat$v1$get_variable(
         name = "ema_means",
         initializer = self$codebook$initialized_value(),
         trainable = FALSE
@@ -308,7 +314,7 @@ update_ema <- function(vector_quantizer,
   updated_ema_means <-
     updated_ema_means / tf$expand_dims(updated_ema_count, axis = -1L)
   
-  tf$assign(vector_quantizer$codebook, updated_ema_means)
+  tf$compat$v1$assign(vector_quantizer$codebook, updated_ema_means)
 }
 
 
@@ -321,7 +327,7 @@ decoder <- decoder_model(input_size = latent_size * code_size,
 vector_quantizer <-
   vector_quantizer_model(num_codes = num_codes, code_size = code_size)
 
-optimizer <- tf$train$AdamOptimizer(learning_rate = learning_rate)
+optimizer <- tf$optimizers$Adam(learning_rate = learning_rate)
 
 checkpoint_dir <- "./vq_vae_checkpoints"
 
@@ -365,7 +371,7 @@ for (epoch in seq_len(num_epochs)) {
       
       commitment_loss <- tf$reduce_mean(tf$square(codes - tf$stop_gradient(nearest_codebook_entries)))
       
-      prior_dist <- tfd$Multinomial(total_count = 1,
+      prior_dist <- tfd_multinomial(total_count = 1,
                                     logits = tf$zeros(c(latent_size, num_codes)))
       prior_loss <- -tf$reduce_mean(tf$reduce_sum(prior_dist$log_prob(one_hot_assignments), 1L))
       
@@ -379,12 +385,10 @@ for (epoch in seq_len(num_epochs)) {
     
     optimizer$apply_gradients(purrr::transpose(list(
       encoder_gradients, encoder$variables
-    )),
-    global_step = tf$train$get_or_create_global_step())
+    )))
     optimizer$apply_gradients(purrr::transpose(list(
       decoder_gradients, decoder$variables
-    )),
-    global_step = tf$train$get_or_create_global_step())
+    )))
     
     update_ema(vector_quantizer,
                one_hot_assignments,