Merge pull request #28 from jackd:moving_mnist

PiperOrigin-RevId: 232784325
tensorflow · Feb 7, 2019 · 97b0093 · 97b0093
2 parents b2b6ff7 + df56746
commit 97b0093
Show file tree

Hide file tree

Showing 9 changed files with 509 additions and 10 deletions.
diff --git a/docs/api_docs/python/tfds/_api_cache.json b/docs/api_docs/python/tfds/_api_cache.json
@@ -1,5 +1,5 @@
 {
-  "current_doc_full_name": "tfds.features.text.TextEncoder.__hash__", 
+  "current_doc_full_name": "tfds.percent.__doc__", 
   "duplicate_of": {
     "tfds.GenerateMode": "tfds.download.GenerateMode", 
     "tfds.GenerateMode.FORCE_REDOWNLOAD": "tfds.download.GenerateMode.FORCE_REDOWNLOAD", 

diff --git a/docs/api_docs/python/tfds/features/Video.md b/docs/api_docs/python/tfds/features/Video.md
@@ -27,10 +27,11 @@ Defined in [`core/features/video_feature.py`](https://github.com/tensorflow/data
 `FeatureConnector` for videos, png-encoding frames on disk.
 
 Video: The image connector accepts as input:
-  * uint8 array representing an video.
+  * uint8 array representing a video.
 
 Output:
-  video: tf.Tensor of type tf.uint8 and shape [num_frames, height, width, 3]
+  video: tf.Tensor of type tf.uint8 and shape
+    [num_frames, height, width, channels], where channels must be 1 or 3
 
 Example:
   * In the DatasetInfo object:
@@ -54,7 +55,7 @@ Construct the connector.
 #### Args:
 
 * <b>`shape`</b>: tuple of ints, the shape of the video (num_frames, height, width,
-    channels=3).
+    channels), where channels is 1 or 3.
 
 
 #### Raises:

diff --git a/docs/datasets.md b/docs/datasets.md
@@ -31,6 +31,7 @@ np_datasets = tfds.as_numpy(datasets)
 * [`audio`](#audio)
   * [`"nsynth"`](#nsynth)
 * [`image`](#image)
+  * [`"cats_vs_dogs"`](#cats_vs_dogs)
   * [`"celeb_a"`](#celeb_a)
   * [`"cifar10"`](#cifar10)
   * [`"cifar100"`](#cifar100)
@@ -55,6 +56,7 @@ np_datasets = tfds.as_numpy(datasets)
   * [`"wmt_translate_enfr"`](#wmt_translate_enfr)
 * [`video`](#video)
   * [`"bair_robot_pushing_small"`](#bair_robot_pushing_small)
+  * [`"moving_mnist"`](#moving_mnist)
   * [`"starcraft_video"`](#starcraft_video)
 
 ---
@@ -147,6 +149,49 @@ None
 
 # [`image`](#image)
 
+## `"cats_vs_dogs"`
+
+A large set of images of cats and dogs.There are 1800 corrupted images that are dropped.
+
+* URL: [https://www.microsoft.com/en-us/download/details.aspx?id=54765](https://www.microsoft.com/en-us/download/details.aspx?id=54765)
+* `DatasetBuilder`: [`tfds.image.cats_vs_dogs.CatsVsDogs`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/image/cats_vs_dogs.py)
+* Version: `v1.0.0`
+
+### Features
+```
+FeaturesDict({
+    'image': Image(shape=(None, None, 3), dtype=tf.uint8),
+    'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
+})
+```
+
+
+### Statistics
+None computed
+
+### Urls
+ * [https://www.microsoft.com/en-us/download/details.aspx?id=54765](https://www.microsoft.com/en-us/download/details.aspx?id=54765)
+
+### Supervised keys (for `as_supervised=True`)
+None
+
+### Citation
+```
+@Inproceedings (Conference){asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization,
+author = {Elson, Jeremy and Douceur, John (JD) and Howell, Jon and Saul, Jared},
+title = {Asirra: A CAPTCHA that Exploits Interest-Aligned Manual Image Categorization},
+booktitle = {Proceedings of 14th ACM Conference on Computer and Communications Security (CCS)},
+year = {2007},
+month = {October},
+publisher = {Association for Computing Machinery, Inc.},
+url = {https://www.microsoft.com/en-us/research/publication/asirra-a-captcha-that-exploits-interest-aligned-manual-image-categorization/},
+edition = {Proceedings of 14th ACM Conference on Computer and Communications Security (CCS)},
+}
+
+```
+
+---
+
 ## `"celeb_a"`
 
 Large-scale CelebFaces Attributes, CelebA.Set of ~30k celebrities pictures. These pictures are cropped.
@@ -1543,6 +1588,61 @@ None
 
 ---
 
+## `"moving_mnist"`
+
+Moving variant of MNIST database of handwritten digits. This is the
+data used by the authors for reporting model performance. See
+`tfds.video.moving_mnist.image_as_moving_sequence`
+for generating training/validation data from the MNIST dataset.
+
+
+* URL: [http://www.cs.toronto.edu/~nitish/unsupervised_video/](http://www.cs.toronto.edu/~nitish/unsupervised_video/)
+* `DatasetBuilder`: [`tfds.video.moving_mnist.MovingMnist`](https://github.com/tensorflow/datasets/tree/master/tensorflow_datasets/video/moving_mnist.py)
+* Version: `v0.1.0`
+
+### Features
+```
+FeaturesDict({
+    'image_sequence': Video(shape=(20, 64, 64, 1), dtype=tf.uint8, feature=Image(shape=(64, 64, 1), dtype=tf.uint8)),
+})
+```
+
+
+### Statistics
+Split  | Examples
+:----- | ---:
+TEST       |     10,000
+ALL        |     10,000
+
+
+### Urls
+ * [http://www.cs.toronto.edu/~nitish/unsupervised_video/](http://www.cs.toronto.edu/~nitish/unsupervised_video/)
+
+### Supervised keys (for `as_supervised=True`)
+None
+
+### Citation
+```
+@article{DBLP:journals/corr/SrivastavaMS15,
+  author    = {Nitish Srivastava and
+               Elman Mansimov and
+               Ruslan Salakhutdinov},
+  title     = {Unsupervised Learning of Video Representations using LSTMs},
+  journal   = {CoRR},
+  volume    = {abs/1502.04681},
+  year      = {2015},
+  url       = {http://arxiv.org/abs/1502.04681},
+  archivePrefix = {arXiv},
+  eprint    = {1502.04681},
+  timestamp = {Mon, 13 Aug 2018 16:47:05 +0200},
+  biburl    = {https://dblp.org/rec/bib/journals/corr/SrivastavaMS15},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+```
+
+---
+
 ## `"starcraft_video"`
 
 This data set contains videos generated from Starcraft.

diff --git a/tensorflow_datasets/core/features/sequence_feature.py b/tensorflow_datasets/core/features/sequence_feature.py
@@ -262,9 +262,7 @@ def np_to_list(elem):
   elif isinstance(elem, tuple):
     return list(elem)
   elif isinstance(elem, np.ndarray):
-    elem = np.split(elem, elem.shape[0])
-    elem = np.squeeze(elem, axis=0)
-    return elem
+    return list(elem)
   else:
     raise ValueError(
         'Input elements of a sequence should be either a numpy array, a '

diff --git a/tensorflow_datasets/core/features/video_feature.py b/tensorflow_datasets/core/features/video_feature.py
@@ -29,10 +29,11 @@ class Video(sequence_feature.Sequence):
   """`FeatureConnector` for videos, png-encoding frames on disk.
 
   Video: The image connector accepts as input:
-    * uint8 array representing an video.
+    * uint8 array representing a video.
 
   Output:
-    video: tf.Tensor of type tf.uint8 and shape [num_frames, height, width, 3]
+    video: tf.Tensor of type tf.uint8 and shape
+      [num_frames, height, width, channels], where channels must be 1 or 3
 
   Example:
     * In the DatasetInfo object:
@@ -51,7 +52,7 @@ def __init__(self, shape):
 
     Args:
       shape: tuple of ints, the shape of the video (num_frames, height, width,
-        channels=3).
+        channels), where channels is 1 or 3.
 
     Raises:
       ValueError: If the shape is invalid

diff --git a/tensorflow_datasets/video/__init__.py b/tensorflow_datasets/video/__init__.py
@@ -16,5 +16,6 @@
 """Video datasets."""
 
 from tensorflow_datasets.video.bair_robot_pushing import BairRobotPushingSmall
+from tensorflow_datasets.video.moving_mnist import MovingMnist
 from tensorflow_datasets.video.starcraft import StarcraftVideo
 from tensorflow_datasets.video.starcraft import StarcraftVideoConfig
diff --git a/tensorflow_datasets/video/moving_mnist.py b/tensorflow_datasets/video/moving_mnist.py
@@ -0,0 +1,99 @@
+# coding=utf-8
+# Copyright 2019 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""MovingMNIST."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+import tensorflow_datasets.public_api as tfds
+from tensorflow_datasets.video.moving_sequence import image_as_moving_sequence  # pylint: disable=unused-import
+
+_OUT_RESOLUTION = (64, 64)
+_SEQUENCE_LENGTH = 20
+_URL = "http://www.cs.toronto.edu/~nitish/unsupervised_video/"
+_CITATION = """\
+@article{DBLP:journals/corr/SrivastavaMS15,
+  author    = {Nitish Srivastava and
+               Elman Mansimov and
+               Ruslan Salakhutdinov},
+  title     = {Unsupervised Learning of Video Representations using LSTMs},
+  journal   = {CoRR},
+  volume    = {abs/1502.04681},
+  year      = {2015},
+  url       = {http://arxiv.org/abs/1502.04681},
+  archivePrefix = {arXiv},
+  eprint    = {1502.04681},
+  timestamp = {Mon, 13 Aug 2018 16:47:05 +0200},
+  biburl    = {https://dblp.org/rec/bib/journals/corr/SrivastavaMS15},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+"""
+_DESCRIPTION = """\
+Moving variant of MNIST database of handwritten digits. This is the
+data used by the authors for reporting model performance. See
+`tfds.video.moving_mnist.image_as_moving_sequence`
+for generating training/validation data from the MNIST dataset.
+"""
+
+
+class MovingMnist(tfds.core.GeneratorBasedBuilder):
+  """MovingMnist."""
+
+  VERSION = tfds.core.Version("0.1.0")
+
+  def _info(self):
+    return tfds.core.DatasetInfo(
+        builder=self,
+        description=_DESCRIPTION,
+        features=tfds.features.FeaturesDict({
+            "image_sequence": tfds.features.Video(
+                shape=(_SEQUENCE_LENGTH,) + _OUT_RESOLUTION + (1,))
+        }),
+        urls=[_URL],
+        citation=_CITATION,
+    )
+
+  def _split_generators(self, dl_manager):
+    data_path = dl_manager.download(_URL + "mnist_test_seq.npy")
+
+    # authors only provide test data.
+    # See `tfds.video.moving_mnist.image_as_moving_sequence` for mapping
+    # function to create training/validation dataset from MNIST.
+    return [
+        tfds.core.SplitGenerator(
+            name=tfds.Split.TEST,
+            num_shards=5,
+            gen_kwargs=dict(data_path=data_path)),
+    ]
+
+  def _generate_examples(self, data_path):
+    """Generate MovingMnist sequences.
+
+    Args:
+      data_path (str): Path to the data file
+
+    Yields:
+      20 x 64 x 64 x 1 uint8 numpy arrays
+    """
+    with tf.io.gfile.GFile(data_path, "rb") as fp:
+      images = np.load(fp)
+    images = np.transpose(images, (1, 0, 2, 3))
+    images = np.expand_dims(images, axis=-1)
+    for sequence in images:
+      yield dict(image_sequence=sequence)