In [1]:
import urllib
from IPython.display import Markdown as md

### change to reflect your notebook
_nb_loc = "07_training/07a_ingest.ipynb"
_nb_title = "Writing an efficient ingest Loop"

_icons=["https://raw.githubusercontent.com/GoogleCloudPlatform/practical-ml-vision-book/master/logo-cloud.png", "https://www.tensorflow.org/images/colab_logo_32px.png", "https://www.tensorflow.org/images/GitHub-Mark-32px.png", "https://www.tensorflow.org/images/download_logo_32px.png"]
_links=["https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?" + urllib.parse.urlencode({"name": _nb_title, "download_url": "https://github.com/takumiohym/practical-ml-vision-book-ja/raw/master/"+_nb_loc}), "https://colab.research.google.com/github/takumiohym/practical-ml-vision-book-ja/blob/master/{0}".format(_nb_loc), "https://github.com/takumiohym/practical-ml-vision-book-ja/blob/master/{0}".format(_nb_loc), "https://raw.githubusercontent.com/takumiohym/practical-ml-vision-book-ja/master/{0}".format(_nb_loc)]
md("""<table class="tfo-notebook-buttons" align="left"><td><a target="_blank" href="{0}"><img src="{4}"/>Run in Vertex AI Workbench</a></td><td><a target="_blank" href="{1}"><img src="{5}" />Run in Google Colab</a></td><td><a target="_blank" href="{2}"><img src="{6}" />View source on GitHub</a></td><td><a href="{3}"><img src="{7}" />Download notebook</a></td></table><br/><br/>""".format(_links[0], _links[1], _links[2], _links[3], _icons[0], _icons[1], _icons[2], _icons[3]))

<table class="tfo-notebook-buttons" align="left"><td><a target="_blank" href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?name=Writing+an+efficient+ingest+Loop&download_url=https%3A%2F%2Fgithub.com%2Ftakumiohym%2Fpractical-ml-vision-book-ja%2Fraw%2Fmaster%2F07_training%2F07a_ingest.ipynb"><img src="https://raw.githubusercontent.com/GoogleCloudPlatform/practical-ml-vision-book/master/logo-cloud.png"/>Run in Vertex AI Workbench</a></td><td><a target="_blank" href="https://colab.research.google.com/github/takumiohym/practical-ml-vision-book-ja/blob/master/07_training/07a_ingest.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a></td><td><a target="_blank" href="https://github.com/takumiohym/practical-ml-vision-book-ja/blob/master/07_training/07a_ingest.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a></td><td><a href="https://raw.githubusercontent.com/takumiohym/practical-ml-vision-book-ja/master/07_training/07a_ingest.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a></td></table><br/><br/>

# 効率的なデータの読み込み

このノートブックでは、データのモデルへの取り込みを高速化する方法を確認します。

## オリジナルのコード  

以下のコードはは[../06_preprocessing/06e_colordistortion.ipynb](../06_preprocessing/06e_colordistortion.ipynb)をもとにして作成されています。

読み取りのコードのバリエーションは、複数の`create_preproc...`関数で実装されています。

In [2]:
import matplotlib.pylab as plt
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import os
# Load compressed models from tensorflow_hub
os.environ['TFHUB_MODEL_LOAD_FORMAT'] = 'COMPRESSED'    

from tensorflow.data import AUTOTUNE

IMG_HEIGHT = 448 # note *twice* what we used to have
IMG_WIDTH = 448
IMG_CHANNELS = 3
CLASS_NAMES = 'daisy dandelion roses sunflowers tulips'.split()

def training_plot(metrics, history):
  f, ax = plt.subplots(1, len(metrics), figsize=(5*len(metrics), 5))
  for idx, metric in enumerate(metrics):
    ax[idx].plot(history.history[metric], ls='dashed')
    ax[idx].set_xlabel("Epochs")
    ax[idx].set_ylabel(metric)
    ax[idx].plot(history.history['val_' + metric]);
    ax[idx].legend([metric, 'val_' + metric])
    
class _Preprocessor:    
    def __init__(self):
        # nothing to initialize
        pass
    
    def read_from_tfr(self, proto):
        feature_description = {
            'image': tf.io.VarLenFeature(tf.float32),
            'shape': tf.io.VarLenFeature(tf.int64),
            'label': tf.io.FixedLenFeature([], tf.string, default_value=''),
            'label_int': tf.io.FixedLenFeature([], tf.int64, default_value=0),
        }
        rec = tf.io.parse_single_example(
            proto, feature_description
        )
        shape = tf.sparse.to_dense(rec['shape'])
        img = tf.reshape(tf.sparse.to_dense(rec['image']), shape)
        label_int = rec['label_int']
        return img, label_int
    
    def read_from_jpegfile(self, filename):
        # same code as in 05_create_dataset/jpeg_to_tfrecord.py
        img = tf.io.read_file(filename)
        img = tf.image.decode_jpeg(img, channels=IMG_CHANNELS)
        img = tf.image.convert_image_dtype(img, tf.float32)
        return img
      
    def preprocess(self, img):
        return tf.image.resize_with_pad(img, IMG_HEIGHT, IMG_WIDTH)

def create_preproc_dataset_plain(pattern):
    preproc = _Preprocessor()
    trainds = tf.data.TFRecordDataset(
        [filename for filename in tf.io.gfile.glob(pattern)],
        compression_type='GZIP'
    ).map(preproc.read_from_tfr).map(
        lambda img, label: (preproc.preprocess(img), label)
    )                             
    return trainds

# note: addition of AUTOTUNE to the map() calls
def create_preproc_dataset_parallelmap(pattern):
    preproc = _Preprocessor()
    def _preproc_img_label(img, label):
        return (preproc.preprocess(img), label)
    trainds = (
        tf.data.TFRecordDataset(
            [filename for filename in tf.io.gfile.glob(pattern)],
            compression_type='GZIP'
        )
        .map(preproc.read_from_tfr, num_parallel_calls=AUTOTUNE)
        .map(_preproc_img_label, num_parallel_calls=AUTOTUNE)
    )
    return trainds

# note: splits the files into two halves and interleaves datasets
def create_preproc_dataset_interleave(pattern, num_parallel=None):
    preproc = _Preprocessor()
    files = [filename for filename in tf.io.gfile.glob(pattern)]
    if len(files) > 1:
        print("Interleaving the reading of {} files.".format(len(files)))
        def _create_half_ds(x):
            if x == 0:
                half = files[:(len(files)//2)]
            else:
                half = files[(len(files)//2):]
            return tf.data.TFRecordDataset(half,
                                          compression_type='GZIP')
        trainds = tf.data.Dataset.range(2).interleave(
            _create_half_ds, num_parallel_calls=AUTOTUNE)
    else:
        trainds = tf.data.TFRecordDataset(files,
                                         compression_type='GZIP')
    def _preproc_img_label(img, label):
        return (preproc.preprocess(img), label)
    
    trainds = (trainds
               .map(preproc.read_from_tfr, num_parallel_calls=num_parallel)
               .map(_preproc_img_label, num_parallel_calls=num_parallel)
              )
    return trainds

class RandomColorDistortion(tf.keras.layers.Layer):
    def __init__(self, contrast_range=[0.5, 1.5], 
                 brightness_delta=[-0.2, 0.2], **kwargs):
        super(RandomColorDistortion, self).__init__(**kwargs)
        self.contrast_range = contrast_range
        self.brightness_delta = brightness_delta
    
    def call(self, images, training=None):
        if not training:
            return images
        
        contrast = np.random.uniform(
            self.contrast_range[0], self.contrast_range[1])
        brightness = np.random.uniform(
            self.brightness_delta[0], self.brightness_delta[1])
        
        images = tf.image.adjust_contrast(images, contrast)
        images = tf.image.adjust_brightness(images, brightness)
        images = tf.clip_by_value(images, 0, 1)
        return images

2022-07-16 20:35:01.210272: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-07-16 20:35:01.210319: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


## データの読み取りを高速化  

データの読み取りを数回行い、速度を計測します。

In [4]:
def loop_through_dataset(ds, nepochs):
    lowest_mean = tf.constant(1.)
    for epoch in range(nepochs):
        thresh = np.random.uniform(0.3, 0.7) # random threshold
        count = 0
        sumsofar = tf.constant(0.)
        for (img, label) in ds:
            # mean of channel values > thresh
            mean = tf.reduce_mean(tf.where(img > thresh, img, 0))
            sumsofar = sumsofar + mean
            count = count + 1
            if count%100 == 0:
                print('.', end='')
        mean = sumsofar/count
        print(mean)
        if mean < lowest_mean:
            lowest_mean = mean
    return lowest_mean

In [31]:
PATTERN_SUFFIX, NUM_EPOCHS = '-0000[01]-*', 2 # 2 files, 2 epochs
#PATTERN_SUFFIX, NUM_EPOCHS = '-*', 20 # 16 files, 20 epochs

In [6]:
%%time
ds = create_preproc_dataset_plain(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
)
loop_through_dataset(ds, NUM_EPOCHS)

...tf.Tensor(0.22762734, shape=(), dtype=float32)
...tf.Tensor(0.21017572, shape=(), dtype=float32)
CPU times: user 7.03 s, sys: 500 ms, total: 7.53 s
Wall time: 7.99 s


<tf.Tensor: shape=(), dtype=float32, numpy=0.21017572>

In [7]:
%%time
# parallel map
ds = create_preproc_dataset_parallelmap(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
)
loop_through_dataset(ds, NUM_EPOCHS)

...tf.Tensor(0.14244522, shape=(), dtype=float32)
...tf.Tensor(0.18533988, shape=(), dtype=float32)
CPU times: user 7.93 s, sys: 375 ms, total: 8.3 s
Wall time: 5.94 s


<tf.Tensor: shape=(), dtype=float32, numpy=0.14244522>

In [32]:
%%time
# with interleave
ds = create_preproc_dataset_interleave(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX,
    num_parallel=None
)
loop_through_dataset(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
first half
second half
...tf.Tensor(0.12316246, shape=(), dtype=float32)
...tf.Tensor(0.15402032, shape=(), dtype=float32)
CPU times: user 7.86 s, sys: 497 ms, total: 8.35 s
Wall time: 5.29 s


<tf.Tensor: shape=(), dtype=float32, numpy=0.12316246>

In [9]:
%%time
# with interleave and parallel mpas
ds = create_preproc_dataset_interleave(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX,
    num_parallel=AUTOTUNE
)
loop_through_dataset(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
...tf.Tensor(0.18058446, shape=(), dtype=float32)
...tf.Tensor(0.14600855, shape=(), dtype=float32)
CPU times: user 7.99 s, sys: 443 ms, total: 8.44 s
Wall time: 5.23 s


<tf.Tensor: shape=(), dtype=float32, numpy=0.14600855>

### 結果: 
|方法                               |CPU時間 |実測時間 |
|:--                                |:--     |:--      |
|ベースライン                         |7.53 s  |7.99 s   |
|マップ処理の並列化                 |8.30 s  |5.94 s   |
|インターリーブ                     |8.60 s  |5.47 s   |
|インターリーブ＋マップ処理の並列化 |8.44 s  |5.23 s   |

注: 実行するCPUやTensorFlowバージョンによって異なります。 

## MLモデル  

上記の計算は、すべてのピクセル値を合計するだけでした。<br>
もう少し複雑な計算(勾配計算など)が必要な場合でも計測してみましょう。

In [14]:
def train_simple_model(ds, nepochs):
    model = tf.keras.Sequential([
        tf.keras.layers.Flatten(
            input_shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS)),
        #tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(len(CLASS_NAMES), activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(
                    from_logits=False),
                metrics=['accuracy'])
    model.fit(ds, epochs=nepochs)

In [15]:
%%time
ds = create_preproc_dataset_plain(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).batch(1)
train_simple_model(ds, NUM_EPOCHS)

Epoch 1/2
Epoch 2/2
CPU times: user 9.12 s, sys: 796 ms, total: 9.91 s
Wall time: 9.39 s


In [16]:
%%time
# parallel map
ds = create_preproc_dataset_parallelmap(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).batch(1)
train_simple_model(ds, NUM_EPOCHS)

Epoch 1/2
Epoch 2/2
CPU times: user 9.97 s, sys: 718 ms, total: 10.7 s
Wall time: 8.17 s


In [17]:
%%time
# with interleave
ds = create_preproc_dataset_interleave(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX,
    num_parallel=None
).batch(1)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 9.7 s, sys: 825 ms, total: 10.5 s
Wall time: 7.54 s


In [18]:
%%time
# with interleave and parallel mpas
ds = create_preproc_dataset_interleave(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX,
    num_parallel=AUTOTUNE
).batch(1)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 9.6 s, sys: 728 ms, total: 10.3 s
Wall time: 7.17 s


### 結果: 


|方法                               |CPU時間 |実測時間  |
|:--                                |:--     |:--       |
|並列化なし                         |9.91 s  |9.39 s    |
|マップ処理の並列化                 |10.7 s  |8.17 s    |
|インターリーブ                     |10.5 s  |7.54 s    |
|インターリーブ＋マップ処理の並列化 |10.3 s  |7.17 s    |

注: 実行するCPUやTensorFlowバージョンによって異なります。 

## さまざまなデータ処理の比較

In [19]:
# alias to the more efficient one
def create_preproc_dataset(pattern):
    return create_preproc_dataset_interleave(pattern, num_parallel=AUTOTUNE)

In [20]:
%%time
# add prefetching
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).prefetch(AUTOTUNE).batch(1)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 10.5 s, sys: 899 ms, total: 11.4 s
Wall time: 8.09 s


In [26]:
%%time
# Add batching of different sizes
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).prefetch(AUTOTUNE).batch(8)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 8.97 s, sys: 590 ms, total: 9.56 s
Wall time: 6.9 s


In [21]:
%%time
# Add batching of different sizes
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).prefetch(AUTOTUNE).batch(16)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 8.97 s, sys: 939 ms, total: 9.9 s
Wall time: 6.7 s


In [22]:
%%time
# Add batching of different sizes
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).prefetch(AUTOTUNE).batch(32)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 8.78 s, sys: 902 ms, total: 9.68 s
Wall time: 6.37 s


In [23]:
%%time
# add caching: always do this optimization last.
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).cache().batch(32)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 5.16 s, sys: 1 s, total: 6.16 s
Wall time: 4.36 s


In [24]:
%%time
# add caching: always do this optimization last.
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).prefetch(AUTOTUNE).cache().batch(32)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 5.01 s, sys: 756 ms, total: 5.76 s
Wall time: 4.04 s


In [25]:
%%time
# add caching: always do this optimization last.
ds = create_preproc_dataset(
    'gs://practical-ml-vision-book/flowers_tfr/train' + PATTERN_SUFFIX
).cache().prefetch(AUTOTUNE).batch(32)
train_simple_model(ds, NUM_EPOCHS)

Interleaving the reading of 2 files.
Epoch 1/2
Epoch 2/2
CPU times: user 4.95 s, sys: 692 ms, total: 5.65 s
Wall time: 4.19 s


### 結果: 

|方法                               |CPU時間 |実測時間  |
|:--                                |:--     |:--       |
|バッチサイズ 1 、プリフェッチ |11.4 s  |8.09 s   |
|バッチサイズ 8 、プリフェッチ  |9.56 s  |6.90 s   |
|バッチサイズ 16 、プリフェッチ |9.90 s  |6.70 s   |
|バッチサイズ 32 、プリフェッチ |9.68 s  |6.37 s   |
|バッチサイズ 32、キャッシュ| 6.16秒| 4.36秒|  
|バッチサイズ 32、プリフェッチ -> キャッシュ| 5.76秒| 4.04秒|  
|バッチサイズ 32、キャッシュ -> プリフェッチ| 5.65秒| 4.19秒|  

したがって、今回の場合は以下の方法が最速のようです。

```python  
ds = create_preproc_dataset_interleave(pattern, 
                                       num_parallel=AUTOTUNE).prefetch(AUTOTUNE).cache().batch(32)  
```

## License
Copyright 2022 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.