# A highly efficient tf.data input pipeline with EfficientNets


In this notebook, I am going to create a tf.data input pipeline, which is very efficient if done correctly. It not only speeds up preprocessing but alsospeeds up actual training by a bit. 
This notebook is a reference for me. So, i have made it as explanatory and easy to revise as possible. 
So, sit tight,relax and enjoy the ride.


In [None]:
#Import required libraries
import tensorflow as tf
import os
import numpy as np
import pandas as pd
import tensorflow_hub as hub
#import cv2

As you may note, I have included a new dataset of TFRecords. Credits to [@DimitreOliveira](https://www.kaggle.com/dimitreoliveira). The following functions are inspired by his notebbok itself.

Following auxilliary functions are optimized using @tf.function decorator. Thus, many operations are from TensorFlow library and not native python. 

In [None]:
#Declaring constants and hyperparameters

class Hyperparameters:
    TFRECORDS_FORMAT={'image': tf.io.FixedLenFeature([], tf.string),
                      'image_name': tf.io.FixedLenFeature([], tf.string),
                      'target': tf.io.FixedLenFeature([], tf.int64)}
    BATCH_SIZE=32
    AUTOTUNE=tf.data.experimental.AUTOTUNE
    HEIGHT=224
    WIDTH=224
    WIDTH_FACTOR=0.2
    HEIGHT_FACTOR=0.2
    FILL_MODE='reflect'
    TRAINING=True

In [None]:
#Reading the TFRecords

class DataParser(Hyperparameters):
    #All functions except tf
    def __init__(self): #Take required hyperparameters
        self.TFRECORDS_FORMAT=Hyperparameters.TFRECORDS_FORMAT
        self.BATCH_SIZE=Hyperparameters.BATCH_SIZE
        self.HEIGHT=Hyperparameters.HEIGHT
        self.WIDTH=Hyperparameters.WIDTH
     
    @tf.function
    def readTFRecs(self,dir_name): 
        #Read the TFRecords and make a Dataset iterator. 
        #Tensorflow has an object called tf.data.iterator which is created by calling iter() method
        TFRecFiles=tf.constant(tf.io.gfile.listdir(dir_name))
        TFRecFiles=tf.map_fn(lambda name:dir_name+'/'+name,TFRecFiles)
        TFRecDataset=tf.data.TFRecordDataset(TFRecFiles)#.batch(self.BATCH_SIZE).prefetch(1)
        #self.dataset_len=tf.data.experimental.cardinality(TFRecDataset).numpy()
        Dataset = TFRecDataset.map(lambda example:tf.io.parse_example(example,self.TFRECORDS_FORMAT))
        return Dataset
    
    @tf.function
    def decode_image(self,entry):
       return tf.image.decode_image(entry['image'],channels=3),tf.one_hot(entry['target'],5) #[batch_size,h,w,3]
    
    
    #@tf.function
    def makeDataset(self,TFRecDataset):
        Dataset = TFRecDataset.map(lambda entry: self.decode_image(entry))
        #Dataset = Dataset.map(lambda entry:(entry['image'],tf.one_hot(entry['target'],5)))
        Dataset = Dataset.shuffle(4000)
        #Dataset=Dataset.zip(TFRecDataset.map(lambda entry:entry['target']))
        Dataset = Dataset.batch(self.BATCH_SIZE).prefetch(1)
        return Dataset
    

In [None]:
dp=DataParser()
TFRecDataset=dp.readTFRecs('../input/cassava-tfrecords-512x512')
Dataset=dp.makeDataset(TFRecDataset)
#Dataset = Dataset.shuffle(600)
valDataset=Dataset.take(50).prefetch(dp.AUTOTUNE)
trainDataset=Dataset.skip(50).prefetch(dp.AUTOTUNE)


In [None]:
#Callbacks

model_save=tf.keras.callbacks.ModelCheckpoint(
    './inceptionresnet_{epoch:02d}.h5',
    monitor="val_loss",
    verbose=1,
    mode="auto",
    save_freq="epoch",
)

tensorboard = tf.keras.callbacks.TensorBoard(
  log_dir='./logs',
  histogram_freq=1,
)

callbacks=[model_save,tensorboard]

In [None]:
#Building model
pretrained=hub.KerasLayer("https://tfhub.dev/google/imagenet/pnasnet_large/feature_vector/4",
                   trainable=False)
#pretrained.trainable=False
model=tf.keras.Sequential()
model.add(tf.keras.Input((512,512,3)))
#Augmentation layers
model.add(tf.keras.layers.experimental.preprocessing.Resizing(331,331))
model.add(tf.keras.layers.experimental.preprocessing.Rescaling(1./255.))
#model.add(tf.keras.layers.experimental.preprocessing.RandomFlip())
# model.add(tf.keras.layers.experimental.preprocessing.RandomTranslation(0.2,0.2))
# model.add(tf.keras.layers.experimental.preprocessing.RandomRotation(0.2))
#model.add(tf.keras.layers.experimental.preprocessing.RandomZoom(0.2,0.2))


model.add(pretrained)

#model.add(tf.keras.layers.GlobalMaxPool2D())
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(1080,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(540,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(135,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(96,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(48,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(16,activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(5,activation='softmax'))

#compile model


In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=[tf.keras.metrics.CategoricalAccuracy()])
model.summary()

In [None]:
history=model.fit(x=trainDataset,epochs=80,callbacks=callbacks,verbose=1,validation_data=valDataset)

In [None]:
import matplotlib.pyplot as plt

train_loss = history.history['loss']
val_loss   = history.history['val_loss']
train_acc  = history.history['categorical_accuracy']
val_acc    = history.history['val_categorical_accuracy']
xc         = range(80)

plt.figure()
plt.plot(xc, train_loss)
plt.plot(xc, val_loss)

In [None]:
class Augmenter(tf.keras.layers.Layer):
    def __init__(self):
        super(Augmenter,self).__init__()
        self.resize=tf.keras.layers.experimental.preprocessing.Resizing(224,224)
        self.rescale=tf.keras.layers.experimental.preprocessing.Rescaling(1./255.)
        #self.flip=tf.keras.layers.experimental.preprocessing.RandomFlip()
        #self.translate=tf.keras.layers.experimental.preprocessing.RandomTranslation(0.2,0.2)
        self.rotate=tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)
        self.zoom=tf.keras.layers.experimental.preprocessing.RandomZoom(0.2,0.2)
    
    def build(self,input_shape=(32,512,512,3)):
        pass
    
    @tf.function
    def call(self,inputs):
        return self.zoom(
               self.rotate(
               self.rescale(
               self.resize(inputs))))
           

In [None]:
#!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
#!unzip ngrok-stable-linux-amd64.zip

import os
import multiprocessing


pool = multiprocessing.Pool(processes = 10)
results_of_processes = [pool.apply_async(os.system, args=(cmd, ), callback = None )
                        for cmd in [
                        f"tensorboard --logdir ./logs/ --host 0.0.0.0 --port 6006 &",
                        "./ngrok http 6006 &"
                        ]]

! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"