# Pipeline Optimization

Good resource on the how-to [here](https://www.tensorflow.org/guide/data_performance)

In [37]:
from google.cloud import storage

client = storage.Client()
files = []
for blob in client.list_blobs('tfrs-central-a'):
    files.append(blob.public_url.replace("https://storage.googleapis.com/", "gs://"))
    
print(files[:2])

['gs://tfrs-central-a/file_00-12227.tfrec', 'gs://tfrs-central-a/file_01-12228.tfrec']


In [38]:
import json
import tensorflow as tf
import tensorflow_recommenders as tfrs


def parse_tfrecord_fn(example):
    feature_description = {
         #query features
        "query": tf.io.FixedLenFeature([1], tf.string),
        "last_viewed": tf.io.FixedLenFeature([1], tf.string),

        #candidate features
        "IVM_s": tf.io.FixedLenFeature([1], tf.string),
        "description": tf.io.FixedLenFeature([1], tf.string),
        "price_td": tf.io.FixedLenFeature([1], tf.float32),
        "PriceRange_s": tf.io.FixedLenFeature([1],  tf.string),
        "productTypeCombo_ss": tf.io.FixedLenFeature([1], tf.string),
        "visual": tf.io.FixedLenFeature([2048,], tf.float32),
        "month": tf.io.FixedLenFeature([1], tf.string),
        "hour": tf.io.FixedLenFeature([1], tf.string)
    }
    example = tf.io.parse_single_example(example, feature_description)
    return example


EMBEDDING_DIM = 32
MAX_TOKENS = 1_000_000


TF_RECORDS_DIR = 'gs://tfrs-tf-records'



In [39]:
import time

# Set dev dataset CHANGE THIS LATER TO THE WHOLE DIR
raw_dataset = tf.data.TFRecordDataset(files[:5], num_parallel_reads=tf.data.AUTOTUNE).prefetch(tf.data.AUTOTUNE)

    
parsed_dataset = raw_dataset.map(parse_tfrecord_fn, num_parallel_calls=tf.data.AUTOTUNE)

In [40]:
def benchmark(dataset, num_epochs=2):
    start_time = time.perf_counter()
    for epoch_num in range(num_epochs):
        for sample in dataset:
            # Performing a training step
            time.sleep(0.0001)
    print("Execution time:", time.perf_counter() - start_time)

In [41]:
# Benchmarking first 5 files

benchmark(raw_dataset.map(parse_tfrecord_fn)
         )

Execution time: 52.31239370099502


In [42]:
# Parallel calls

benchmark(
    raw_dataset
    .map(
        parse_tfrecord_fn,
        num_parallel_calls=tf.data.AUTOTUNE
    )
)

Execution time: 51.46366432099603


In [43]:
# Caching
benchmark(
    raw_dataset
    .map(  # Apply time consuming operations before cache
        parse_tfrecord_fn
    ).cache(
    ),
    5
)

Execution time: 101.31243298499612


In [44]:
#interleaving

benchmark(
    tf.data.Dataset.range(2)
    .interleave(
        lambda _: parsed_dataset,
        num_parallel_calls=tf.data.AUTOTUNE
    )
)

Execution time: 102.04474002699135


In [45]:
# Parallel calls w prefetching

benchmark(
    raw_dataset
    .map(
        parse_tfrecord_fn,
        num_parallel_calls=tf.data.AUTOTUNE
    )
    .prefetch(  # Overlap producer and consumer works
        tf.data.AUTOTUNE
    )
)

Execution time: 52.13856463498087


In [46]:
import sys
sys.executable

'/opt/conda/bin/python'