## This Notebook is created to experiment with input pipelines of tensorflow

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import os
import glob
import cv2
%config Completer.use_jedi = False

In [2]:
import tensorflow as tf
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Reshape,Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
np.random.seed(45)
tf.random.set_seed(45)

In [6]:
INPUT_PATH = '/kaggle/input'
TRAINING_PATH = '../input/jpeg-happywhale-128x128/train_images-128-128/train_images-128-128/'
IMAGE_SIZE = 128
IMAGE_SIZE_CUSTOM = 100
SHUFFLE = 2589
BATCH = 50
AUTO = tf.data.AUTOTUNE

In [7]:
len(os.listdir(TRAINING_PATH))

51033

In [8]:
IMAGE_NAMES = os.listdir(TRAINING_PATH)[:1000]

<h2> For easy to use and fast to train, I am taking only 1000 rows for training and validation </h2>

In [None]:
np.random.set_state = 45
np.random.seed(45)
df = pd.DataFrame(IMAGE_NAMES, columns = ['image'])
df['labels'] = np.random.randint(0, 5, size = 1000, )
df['labels_str'] = df.labels.astype('string')  # for ImageGenerator pipeline and for flow_from_dataframe
df['image_path'] = TRAINING_PATH + df['image']

In [None]:
df.head(5)

# Data Analysis

In [None]:
image = tf.io.read_file(df.image_path[0])
image = tf.image.decode_jpeg(image)
plt.imshow(image)

In [None]:
AT =tf.image.resize(image, size = [IMAGE_SIZE_CUSTOM, IMAGE_SIZE_CUSTOM])
AT = tf.cast(AT, tf.float32)/255.0

In [None]:
print(AT.shape)
plt.imshow(AT)

In [None]:
fig, ax = plt.subplots(nrows = 5, ncols = 5, figsize = (20,15))
for i, path in enumerate(df.image_path[:25]):
    image_1 = tf.io.read_file(path)
    image_1 = tf.image.decode_jpeg(image_1)
    ax.ravel()[i].imshow(image_1)
    ax.ravel()[i].set_axis_off()
    plt.tight_layout()
plt.show()
    

In [None]:
px.bar(data_frame = df.labels.value_counts(), y = 'labels', color = 'labels', )

# Input pipeline 

There are many ways to create input pipeline like:
1. using TFRecord
2. Using Image directory 

In [None]:
train_datagen = ImageDataGenerator(rescale= 1./255,
                                  horizontal_flip = True,
                                  validation_split = 0.2)
validation_datagen = ImageDataGenerator(rescale=1./255,
                                       validation_split = 0.2)

In [None]:
df.head(1)

In [None]:
training_set = train_datagen.flow_from_dataframe(df,
                                        TRAINING_PATH,
                                        x_col = 'image', 
                                                y_col = 'labels_str',
                                                seed = 45,
                                                target_size = (IMAGE_SIZE, IMAGE_SIZE),
                                                batch_size = BATCH,
                                                class_mode = 'categorical',
                                                subset = 'training')
validation_set = validation_datagen.flow_from_dataframe(df,
                                                       TRAINING_PATH,
                                                       x_col= 'image',
                                                       y_col = 'labels_str',
                                                       target_size = (IMAGE_SIZE, IMAGE_SIZE),
                                                       batch_size = BATCH,
                                                       class_mode= 'categorical',
                                                        subset = 'validation')

## Input pipeline using tf.data

In [None]:
x_train, x_val,y_train, y_val = train_test_split(df.image_path,
                                                  df.labels, test_size = 0.2)
# x_train, x_resize, y_train, y_resize = train_test_split(x_train, y_train, test_size = 0.5)

In [None]:
x_train.shape, x_val.shape, #x_resize.shape

In [None]:
x_train[790], y_train[790]

In [None]:
list_of_image_path = df.image_path.values
labels = df.labels.values
ds_train = tf.data.Dataset.from_tensor_slices((list_of_image_path, labels))

In [None]:
if np.random.randint(0,2, 1) == 1:
    print('value is 1')
else:
    print('values is 0')

In [None]:
def data_loader(filenames, labels):
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    return dataset

def random_sizing():
    
    return np.random.randint(0,2,1)

def image_parse_resize(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize(image, size= [IMAGE_SIZE_CUSTOM, IMAGE_SIZE_CUSTOM])
    image = tf.cast(image, tf.float32)/255.0
    return image, label

def image_parse(path, label):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels = 3)
    image = tf.image.resize(image, size= [IMAGE_SIZE, IMAGE_SIZE])
    image = tf.cast(image, tf.float32)/255.0
    return image, label

def data_augmentation(image, label):
    image = tf.image.random_hue(image, 0.01)
    image = tf.image.random_saturation(image, 0.7, 1.3)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    image = tf.image.random_flip_left_right(image)
    return image, label

def get_dataset_validation(filenames, labels):
    dataset = data_loader(filenames, labels)
    dataset = dataset.map(image_parse)
    dataset = dataset.map(data_augmentation)
    dataset = dataset.shuffle(SHUFFLE)
    dataset = dataset.batch(BATCH)
    dataset = dataset.prefetch(AUTO)
    return dataset

def get_dataset_train(filenames, labels):
    dataset = data_loader(filenames, labels)
    dataset = dataset.map(image_parse)
    dataset = dataset.map(data_augmentation)
    dataset = dataset.shuffle(SHUFFLE)
    dataset = dataset.batch(BATCH)
    return dataset

def get_resized_dataset(filename, labels):
    dataset = data_loader(filename, labels)
    dataset = dataset.map(image_parse_resize)
    dataset = dataset.map(data_augmentation)
    dataset = dataset.shuffle(SHUFFLE)
    dataset = dataset.batch(BATCH)
    return dataset


# The following functions is created to experiment whether the model
# Accepts varying size images or not

# def train_dataset(filename_train, labels_train, filename_resize, labels_resize):
#     ds_train = get_dataset_train(filename_train, labels_train) 
#     ds_resize = get_resized_dataset(filename_resize, labels_resize)
#     dataset = ds_train.concatenate(ds_resize)
#     dataset = dataset.shuffle(SHUFFLE)
#     return dataset

In [None]:
def data_loader_two_input(filenames, labels):
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
#     label_dataset = tf.data.Dataset.from_tensor_slices(labels)
    return dataset

def image_parse_two_input(filename, label):
    image = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(filename)
    image_resized = tf.image.resize(image, [IMAGE_SIZE_CUSTOM, IMAGE_SIZE_CUSTOM])
    image_original = image
    image_resized = tf.cast(image_resized, tf.float32)/255.0
    image_original = tf.cast(image_original, tf.float32)/255.0
    return image_resized, image_original, label
    
def data_augmentation_two_input(image_resized, image_original, label):
    image_resized = tf.image.random_hue(image_resized, 0.01)
    image_resized = tf.image.random_saturation(image_resized, 0.7, 1.3)
    image_resized = tf.image.random_contrast(image_resized, 0.8, 1.2)
    image_resized = tf.image.random_flip_left_right(image_resized)
    
    image_original = tf.image.random_hue(image_original, 0.01)
    image_original = tf.image.random_saturation(image_original, 0.7, 1.3)
    image_original = tf.image.random_contrast(image_original, 0.8, 1.2)
    image_original = tf.image.random_flip_left_right(image_original)
    return image_resized, image_original, label
    

def dataset_for_two_input_model(filenames, labels):
    dataset = data_loader_two_input(filenames, labels)
    dataset = dataset.map(image_parse_two_input)
    dataset = dataset.map(data_augmentation_two_input)
    dataset = dataset.shuffle(SHUFFLE)
    dataset = dataset.batch(BATCH)
    
#     image_resized, image_original, label = image_parse_two_input(file_dataset, label_dataset)
#     image_resized, image_original, label = data_augmentation_two_input(image_resized, image_original, label)

    return dataset
    

In [None]:
train_ds = train_dataset(x_train, y_train, x_resize, y_resize)
# train_128 = get_dataset_train(x_train, y_train)
# train_100 = get_resized_dataset(x_train, y_train)
val_ds = get_dataset_validation(x_val, y_val)

In [None]:
two_input_ds = dataset_for_two_input_model(x_train, y_train)

In [None]:
tf.constant([4,5]).numpy()

In [None]:
for image, _ in train_128:
    print(image.shape)
    break

In [None]:
for image, _ in train_100:
    print(image.shape)
    break

In [None]:
row, col = 11, 5

row = min(row, BATCH//col)
plt.figure(figsize = (20,10))
plt.subplots(row, col, figsize = (20,10))
for image, label in train_100.take(2):
    for j in range(row*col):
        plt.subplot(row, col, j+1)
        plt.axis('off')
        plt.imshow(image[j])
        plt.title(label[j].numpy())
        plt.title(image.shape)
        plt.tight_layout()
    plt.show()
    print(image.shape)
    


## Model Building for ImageGenerator

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Input((IMAGE_SIZE, IMAGE_SIZE, 3)) ,
  tf.keras.layers.Conv2D(16, 5, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 5, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(5, activation = 'softmax')
])

In [None]:
model.summary()

In [None]:
model.compile(optimizer = tf.keras.optimizers.Adam(),
    loss= tf.keras.losses.CategoricalCrossentropy(), 
    metrics = ['accuracy'])

In [None]:
model.fit(training_set , epochs = 2, 
    validation_data = validation_set)

## Model for customer tf.data

In [None]:
model_1 = tf.keras.Sequential([
  tf.keras.layers.Input((IMAGE_SIZE_CUSTOM, IMAGE_SIZE_CUSTOM, 3)) ,
  tf.keras.layers.Conv2D(16, 5, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 5, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(5, activation = 'softmax')
])

In [None]:
model_1.summary()

In [None]:
model_1.compile(optimizer = tf.keras.optimizers.Adam(),
    loss= tf.keras.losses.SparseCategoricalCrossentropy(), 
    metrics = ['accuracy'])

In [None]:
model_1.fit(train_ds , epochs = 2, 
    validation_data = val_ds)

In [None]:
image = tf.io.read_file(df.image_path[5])
image = tf.image.decode_jpeg(image)
image = tf.image.resize(image, [IMAGE_SIZE_CUSTOM, IMAGE_SIZE_CUSTOM])
image = tf.cast(image, tf.float32)/255.0

In [None]:
plt.imshow(image)

In [None]:
image = tf.reshape(image, [-1, IMAGE_SIZE_CUSTOM, IMAGE_SIZE_CUSTOM, 3])

In [None]:
model_1.predict(image)

In [None]:
df.head()

## Model with two different sized images

In [None]:
# input = keras.Input(shape = (None,))
# if input.shape == (128,128,3):
#     input.add(keras.layers.Conv2D(3, 29, activation = 'relu'))
# else:
#     x = input
# def larger_size(input = input):
#     return input.add(tf.keras.layers.Conv2D(3,29,activation = 'relu'))
# def small_size(input= input):
#     return input
# x = tf.cond(input.shape == (128,128,3), lambda : large_size(input), lambda : small_size(input))

orig_input = tf.keras.layers.Input(shape = (128,128,3), name= 'Original sized')
y = tf.keras.layers.Conv2D(6,29, activation = 'relu')(orig_input)

input_resized = tf.keras.layers.Input(shape = (100,100,3), name = 'resized')

x = tf.keras.layers.Concatenate()([input_resized, y])
    
# x = tf.keras.layers.Reshape((100,100,6))(x)
x = tf.keras.layers.Conv2D(16, 5, activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Conv2D(32, 5, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = tf.keras.layers.MaxPooling2D()(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
output = tf.keras.layers.Dense(5, activation = 'softmax')(x)
model_resize = keras.Model(inputs = [orig_input ,input_resized] , outputs = output)

In [None]:
model_resize.summary()

In [None]:
keras.utils.plot_model(model_resize, show_shapes=True)

In [None]:
model_resize.compile(optimizer = tf.keras.optimizers.Adam(),
    loss= tf.keras.losses.SparseCategoricalCrossentropy(), 
    metrics = ['accuracy'])

In [None]:
model_resize.fit(two_input_ds, epochs = 2)