## Problem statement:

To build a CNN based model which can accurately detect food.

[Dataset is taken from kaggle.](https://www.kaggle.com/rajaraman6195/recipes)

### About this Dataset
This is a list of different food listings of images. The dataset includes the set of images for each recipes.

The dataset contains 5 sub-directories of food images.
- biryani
- burger
- dosa
- idly
- pizza

### Importing all the important libraries

In [1]:
import os
import pathlib
from glob import glob
import fnmatch
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
import PIL

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from keras.utils.vis_utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

In [3]:
! cp -R ../input/recipes ./newrecipes

In [4]:
data_dir_path = "./newrecipes/"

In [5]:
# import shutil
# shutil.rmtree(data_dir_path)

In [6]:
num_skipped = 0

for folder_name in ['briyani', 'burger', 'dosa', 'idly', 'pizza']:
    folder_path = os.path.join(data_dir_path, folder_name)
    for fname in os.listdir(folder_path):
        fpath = os.path.join(folder_path, fname)
        try:
            fobj = open(fpath, "rb")
            is_jfif = tf.compat.as_bytes("JFIF") in fobj.peek(10)
        finally:
            fobj.close()

        if not is_jfif:
            num_skipped += 1
            # Delete corrupted image
            os.remove(fpath)

print("Deleted %d images" % num_skipped)

In [7]:
# Defining the path for train and \test images
data_dir_train = pathlib.Path(data_dir_path)

### Create a dataset

Define some parameters for the loader:

In [8]:
IMAGE_SIZE = 250
BATCH_SIZE = 32
CHANNELS = 3

In [9]:
dataset = tf.keras.preprocessing.image_dataset_from_directory(
    data_dir_path,
    shuffle=True,
    seed=123,
    image_size=(IMAGE_SIZE, IMAGE_SIZE),
    batch_size=BATCH_SIZE,
)

In [10]:
def get_dataset_partition_tf(ds, train_split=0.70, val_split=0.20, test_split=0.10, shuffle=True, shuffle_size=10000):
    
    ds_size = len(ds)
    
    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=12)
    train_size = int(ds_size * train_split)
    val_size = int(ds_size * val_split)
    
    train_ds = ds.take(train_size)
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)
    
    return train_ds, test_ds, val_ds

In [11]:
train_ds, test_ds, val_ds = get_dataset_partition_tf(dataset)

In [12]:
print("Training size:", len(train_ds)*BATCH_SIZE)
print("Validation size:", len(val_ds)*BATCH_SIZE)
print("Testing size:", len(test_ds)*BATCH_SIZE)

In [13]:
class_names = dataset.class_names
class_names

### Visualize the data
#### Visualize one instance of all  classes present in the dataset

In [14]:
temp_class = []
temp_dict = {}
for images, labels in train_ds:
    for i, val in enumerate(class_names):
        if class_names[labels[i]] not in temp_class:
            temp_class.append(class_names[labels[i]])
            temp_dict[class_names[labels[i]]] = images[i].numpy().astype("uint8")
    if len(temp_class) == len(class_names):
        break

In [15]:
plt.figure(figsize=(20, 5))
for i in range(len(class_names)):
    ax = plt.subplot(1, 5, i + 1)
    plt.imshow(temp_dict[class_names[i]])
    plt.title(class_names[i])
    plt.axis("off")

**Keeps the images in memory**

In [16]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

### Data normalization

In [17]:
resize_and_rescale = tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(IMAGE_SIZE, IMAGE_SIZE),
    layers.experimental.preprocessing.Rescaling(1.0/255)
])

### Data augmentation

In [18]:
data_augmentation = keras.Sequential([
    
    layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),    
    layers.experimental.preprocessing.RandomRotation(0.2),
    layers.experimental.preprocessing.RandomZoom(0.25),

])

Let's visualize augmented examples by applying data augmentation to the same image several times:

In [19]:
plt.figure(figsize=(10, 10))
for images, _ in train_ds.take(1):
    for i in range(6):
        augmented_images = data_augmentation(images)
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_images[0].numpy().astype("uint8"))
        plt.axis("off")

In [20]:
# Function to visualize training result
def visualize_train_result(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']

    loss = history.history['loss']
    val_loss = history.history['val_loss']

    plt.figure(figsize=(16, 6))
    plt.subplot(1, 2, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.show()

## Model 1

In [21]:
input_shape = (BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, CHANNELS)
n_classes = len(class_names)
print("input_shape", input_shape)

In [22]:
def define_model1():
    model = Sequential([
        resize_and_rescale,
        data_augmentation,
        layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Conv2D(128, (3,3), activation='relu'),
        layers.MaxPooling2D((2, 2)),

        layers.Flatten(),

        layers.Dense(256, activation='relu'),
        layers.Dense(n_classes, activation='softmax')
    ])
    model.build(input_shape)
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
    return model

In [23]:
model_1 = define_model1()

In [24]:
# View the summary of all layers
model_1.summary()

In [25]:
earlystop = EarlyStopping(monitor='val_loss', patience=15)
LR = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, cooldown=1, verbose=1)

In [26]:
%%time
EPOCHS = 40

history1 = model_1.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[LR]
)

In [27]:
visualize_train_result(history1)

### Findings:
- Training accuracy is 89% and validation accuracy 81%.
- Model is slightly overfitting.
- Let's add dropout to next model and observe the result.

## Data Augmentation.

In [28]:
!pip install Augmentor

In [29]:
import Augmentor

In [None]:
path_to_training_dataset = data_dir_path
for i in class_names:
    p = Augmentor.Pipeline(path_to_training_dataset + i)
    p.rotate(probability=0.6, max_left_rotation=10, max_right_rotation=10)
    p.zoom(probability=0.6, min_factor=1.1, max_factor=1.6)
    p.crop_centre(probability=0.2, percentage_area=0.25, randomise_percentage_area=False)
    p.sample(400) ## We are adding 400 samples per class to make sure that none of the classes are sparse.

**Use augumented data for training**

## Let's predict on test data


In [None]:
def predict(model, img):
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)
    
    predictions = model.predict(img_array)
    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100 * (np.max(predictions[0])), 2)
    
    return predicted_class, confidence

In [None]:
def predict_test(model):
    plt.figure(figsize=(15, 15))
    tak = test_ds.take(1)
    for images, labels in tak:
        for i in range(9):
            ax = plt.subplot(3, 3, i+1)

            plt.imshow(images[i].numpy().astype("uint8"))
            image_1 = images[i].numpy()
            predicted_class, confidence = predict(model, images[i].numpy())
            actual_class = class_names[labels[i]]

            plt.title(f"Actual: {actual_class}, \n Predicted: {predicted_class}, \n Confidence: {confidence}")        
            plt.axis("off")

In [None]:
predict_test(aug_model_1)

## Save model

In [None]:
model_path = './models'

if not os.path.exists(model_path):
    os.mkdir('./models')

In [None]:
model_name_path = f'{model_path}/food_classifier_model'

aug_model_1.save(model_name_path)
aug_model_1.save(f'{model_name_path}_h5/my_model.h5')