##### Copyright 2019 The TensorFlow Authors.

In [0]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Load Numpy Data with tf.data

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/alpha/tutorials/load_data/numpy"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/r2/tutorials/load_data/numpy.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/r2/tutorials/load_data/numpy.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

This tutorial provides an example of loading data from Numpy arrays into a `tf.data.Dataset`.

This examples loads the MNIST dataset from a `.npz` file. However, the source of the Numpy arrays is not important.


## Setup

In [0]:
!pip install tensorflow==2.0.0-alpha0

In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

In [0]:
DATA_URL = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz'

### Load from `.npz` file

In [0]:
path = tf.keras.utils.get_file('mnist.npz', DATA_URL)
with np.load(path) as data:
  train_examples = data['x_train']
  train_labels = data['y_train']
  test_examples = data['x_test']
  test_labels = data['y_test']

## Load Numpy arrays with `tf.data.Dataset`

Assuming you have an array of examples and a corresponding array of labels, pass the two arrays as a tuple into `tf.data.Dataset.from_tensor_slices` to create a `tf.data.Dataset`.

In [0]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_examples, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_examples, test_labels))

## Use the datasets

### Prepare the datasets

For this examples, we're going to flatten the input data, shuffle it, and batch it.

In [0]:
BATCH_SIZE = 64
SHUFFLE_BUFFER_SIZE = 100

def prep_dataset(dataset):

  def preprocess(x, y):
      return tf.reshape(x, [784]) / 255, y

  dataset = dataset.map(preprocess)
  dataset = dataset.shuffle(SHUFFLE_BUFFER_SIZE)
  dataset = dataset.batch(BATCH_SIZE)

  return dataset

train_dataset = prep_dataset(train_dataset)
test_dataset = prep_dataset(test_dataset)

### Build and train a model

In [0]:
def get_model():
  inputs = tf.keras.Input(shape=(784,), name='digits')
  x = tf.keras.layers.Dense(64, activation='relu', name='dense_1')(inputs)
  x = tf.keras.layers.Dense(64, activation='relu', name='dense_2')(x)
  outputs = tf.keras.layers.Dense(
      10, activation='softmax', name='predictions')(
          x)

  model = tf.keras.Model(inputs=inputs, outputs=outputs)
  model.compile(optimizer=tf.keras.optimizers.RMSprop(),  # Optimizer
                # Loss function to minimize
                loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                # List of metrics to monitor
                metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
  return model

In [0]:
model = get_model()
model.fit(train_dataset, epochs=10)
model.evaluate(test_dataset)