# TensorFlow Dataset

In [1]:
import tensorflow as tf

In [2]:
tf.version.VERSION

'2.0.0-dev20190208'

## Datasets playground

In [3]:
import tensorflow_datasets as tfds

# See available datasets
print(tfds.list_builders())

['bair_robot_pushing_small', 'cats_vs_dogs', 'celeb_a', 'cifar10', 'cifar100', 'coco2014', 'diabetic_retinopathy_detection', 'fashion_mnist', 'image_label_folder', 'imagenet2012', 'imdb_reviews', 'lm1b', 'lsun', 'mnist', 'moving_mnist', 'nsynth', 'omniglot', 'open_images_v4', 'quickdraw_bitmap', 'squad', 'starcraft_video', 'svhn_cropped', 'tf_flowers', 'wmt_translate_ende', 'wmt_translate_enfr']


In [None]:
!pwd ~/tensorflow_datasets

In [6]:
# Construct a tf.data.Dataset
dataset = tfds.load(name="fashion_mnist", data_dir='~/tensorflow_datasets')

In [7]:
ls -la ~/tensorflow_datasets/fashion_mnist/1.0.0

total 95640
drwxr-xr-x  16 tarrade  staff      512 Feb  8 13:34 [34m.[m[m/
drwxr-xr-x   3 tarrade  staff       96 Feb  8 13:34 [34m..[m[m/
-rw-r--r--   1 tarrade  staff     3372 Feb  8 13:34 dataset_info.json
-rw-r--r--   1 tarrade  staff  5630824 Feb  8 13:34 fashion_mnist-test.tfrecord-00000-of-00001
-rw-r--r--   1 tarrade  staff  3370332 Feb  8 13:34 fashion_mnist-train.tfrecord-00000-of-00010
-rw-r--r--   1 tarrade  staff  3357425 Feb  8 13:34 fashion_mnist-train.tfrecord-00001-of-00010
-rw-r--r--   1 tarrade  staff  3371467 Feb  8 13:34 fashion_mnist-train.tfrecord-00002-of-00010
-rw-r--r--   1 tarrade  staff  3362288 Feb  8 13:34 fashion_mnist-train.tfrecord-00003-of-00010
-rw-r--r--   1 tarrade  staff  3380831 Feb  8 13:34 fashion_mnist-train.tfrecord-00004-of-00010
-rw-r--r--   1 tarrade  staff  3384792 Feb  8 13:34 fashion_mnist-train.tfrecord-00005-of-00010
-rw-r--r--   1 tarrade  staff  3355384 Feb  8 13:34 fashion_mnist-train.tfrecord-00006-of-00010
-rw-r-

**TODO**
- ensure that cache works only for the same split

In [8]:
cat ~/tensorflow_datasets/fashion_mnist/1.0.0/dataset_info.json

{
  "citation": "@article{DBLP:journals/corr/abs-1708-07747,\n  author    = {Han Xiao and\n               Kashif Rasul and\n               Roland Vollgraf},\n  title     = {Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning\n               Algorithms},\n  journal   = {CoRR},\n  volume    = {abs/1708.07747},\n  year      = {2017},\n  url       = {http://arxiv.org/abs/1708.07747},\n  archivePrefix = {arXiv},\n  eprint    = {1708.07747},\n  timestamp = {Mon, 13 Aug 2018 16:47:27 +0200},\n  biburl    = {https://dblp.org/rec/bib/journals/corr/abs-1708-07747},\n  bibsource = {dblp computer science bibliography, https://dblp.org}\n}\n",
  "description": "Fashion-MNIST is a dataset of Zalando's article images consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes.",
  "downloadChecksums": {
    "http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-imag

In [9]:
dataset['test']

<_OptionsDataset shapes: {image: (28, 28, 1), label: ()}, types: {image: tf.uint8, label: tf.int64}>

## Back to the model

In [10]:
train_dataset, test_dataset = dataset["train"], dataset["test"]
assert isinstance(train_dataset, tf.data.Dataset)

# And then the rest of your input pipeline
train_dataset = train_dataset.repeat().shuffle(1024).batch(128).prefetch(2)
features = tf.compat.v1.data.make_one_shot_iterator(train_dataset).get_next()
image, label = features['image'], features['label']

In [11]:
test_features = tf.compat.v1.data.make_one_shot_iterator(test_dataset).get_next()
test_image, test_label = features['image'], features['label']

In [12]:
train_dataset.take(1)

<TakeDataset shapes: {image: (None, 28, 28, 1), label: (None,)}, types: {image: tf.uint8, label: tf.int64}>

In [20]:
test_image=tf.cast(test_image, tf.float64)

In [13]:
features = tf.compat.v1.data.make_one_shot_iterator(train_dataset).get_next()
image, label = features['image'], features['label']


In [15]:
image=tf.cast(image, tf.float64)

In [16]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dropout(0.4),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
model.fit(image, label, epochs=5, steps_per_epoch=1000)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x136e05940>

In [0]:
model.evaluate(test_image, test_label)



[15.882110357284546, 0.796875]