<a href="https://colab.research.google.com/github/space-owner/Tensorflow-2/blob/main/303_load_numpy_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## ***Load Numpy Data***
This post is **based on the Tensorflow tutorial** for study purposes. [Link](https://www.tensorflow.org/tutorials)   
If you get a chance, be sure to try the this tutorial.

## ***Setup***

In [5]:
import numpy as np
import tensorflow as tf
assert tf.__version__[0] == '2'

## ***Load from .npz file***

In [6]:
data_url = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz"

path = tf.keras.utils.get_file(
    'mnist.npz', data_url
)
with np.load(path) as data:
    train_examples = data['x_train']
    train_labels = data['y_train']
    test_examples = data['x_test']
    test_labels = data['y_test']

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [7]:
print(">>> example shape = ", train_examples.shape)

print(">>> label shape = ", train_labels.shape)

print(">>> type of train example = ", type(train_examples))

print(">>> example data = ", train_examples[0].tolist())

>>> example shape =  (60000, 28, 28)
>>> label shape =  (60000,)
>>> type of train example =  <class 'numpy.ndarray'>
>>> example data =  [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 18, 18, 18, 126, 136, 175, 26, 166, 255, 247, 127, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 49, 238, 253, 253, 253, 253, 253, 253, 253, 253, 251, 93, 82, 82, 56, 39, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 18, 219, 253, 253, 253, 253, 253, 198, 182, 247, 241, 0, 0, 0, 0, 0, 0, 0, 0, 0

### ***Load NumPy arrays with tf.data.Dataset***

In [8]:
train_dataset = tf.data.Dataset.from_tensor_slices(
    (train_examples, train_labels)
)
test_dataset = tf.data.Dataset.from_tensor_slices(
    (test_examples, test_labels)
)

In [9]:
print(">>> print(train_dataset) =", train_dataset)

print(">>> train_dataset.shape = AttributeError: 'TensorSliceDataset' object has no attribute 'shape'")

>>> print(train_dataset) = <TensorSliceDataset shapes: ((28, 28), ()), types: (tf.uint8, tf.uint8)>
>>> train_dataset.shape = AttributeError: 'TensorSliceDataset' object has no attribute 'shape'


### ***Use the datasets***

In [10]:
batch_size = 64
shuffle_buffer_size = 100

train_dataset = train_dataset.shuffle(
    shuffle_buffer_size
).batch(batch_size)

test_dataset = test_dataset.batch(batch_size)

In [11]:
for data, label in test_dataset.take(1):
    print(">>> first test data type=", type(data))
    print(">>> first test data shape =", data.shape)
    print(">>> first test data =", data)

    print(">>> first test label shape =", label.shape)
    print(">>> first test label =", label)

>>> first test data type= <class 'tensorflow.python.framework.ops.EagerTensor'>
>>> first test data shape = (64, 28, 28)
>>> first test data = tf.Tensor(
[[[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 ...

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]

 [[0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  ...
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]
  [0 0 0 ... 0 0 0]]], shape=(64, 28, 28), dtype=uint8)
>>> first test label shape = (64,)
>>>

In [17]:
model = tf.keras.Sequential()

model.add(
    tf.keras.layers.Flatten(input_shape=(28, 28))
)
model.add(
    tf.keras.layers.Dense(128, activation='relu')
)
model.add(
    tf.keras.layers.Dense(10)
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['sparse_categorical_accuracy']
)

In [18]:
model.fit(train_dataset, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f101e818190>

In [19]:
model.evaluate(test_dataset)



[0.32754406332969666, 0.9470999836921692]