# **Import Libraries**

In [1]:
%matplotlib inline
import tensorflow as tf
import numpy as np
from numpy.random import seed
seed(1)
from tensorflow.random import set_seed
set_seed(2)
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import time
import PIL
import glob
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
import os
import skimage
from skimage import io
import cv2
import shutil
%matplotlib inline

# **Create Dataframes from Images**

In [2]:
base_path = 'C:\\Users\\chanp\\Desktop\\image food\\kaggle\\train'
classes = os.listdir(base_path)
filepaths = []
labels = []
for c in classes:
    flist = os.listdir(base_path + '/' + c)
    for f in flist:
        fpath = os.path.join(base_path, c, f)
        filepaths.append(fpath)
        labels.append(c)
print ('filepaths: ', len(filepaths), '   labels: ', len(labels))

filepaths:  11520    labels:  11520


In [3]:
Fseries=pd.Series(filepaths, name='file_paths')
Lseries=pd.Series(labels, name='labels')
train_df=pd.concat([Fseries,Lseries], axis=1)
train_df=pd.DataFrame(train_df, columns = ['file_paths', 'labels'])
print(train_df['labels'].value_counts())

29    240
41    240
44    240
33    240
31    240
35    240
09    240
25    240
08    240
18    240
10    240
37    240
17    240
13    240
15    240
32    240
39    240
12    240
11    240
21    240
47    240
20    240
16    240
04    240
36    240
24    240
27    240
00    240
30    240
43    240
22    240
23    240
01    240
14    240
03    240
28    240
26    240
19    240
40    240
07    240
42    240
05    240
38    240
45    240
06    240
34    240
02    240
46    240
Name: labels, dtype: int64


In [4]:
base_path = 'C:\\Users\\chanp\\Desktop\\image food\\kaggle\\valid'
classes = os.listdir(base_path)
filepaths = []
labels = []
for c in classes:
    flist = os.listdir(base_path + '/' + c)
    for f in flist:
        fpath = os.path.join(base_path, c, f)
        filepaths.append(fpath)
        labels.append(c)
print ('filepaths: ', len(filepaths), '   labels: ', len(labels))

filepaths:  1440    labels:  1440


In [5]:
Fseries=pd.Series(filepaths, name='file_paths')
Lseries=pd.Series(labels, name='labels')
valid_df=pd.concat([Fseries,Lseries], axis=1)
valid_df=pd.DataFrame(valid_df, columns = ['file_paths', 'labels'])
print(valid_df['labels'].value_counts())

29    30
02    30
20    30
22    30
14    30
32    30
39    30
27    30
47    30
25    30
06    30
09    30
08    30
16    30
33    30
18    30
23    30
01    30
31    30
40    30
35    30
10    30
26    30
45    30
34    30
38    30
46    30
19    30
11    30
07    30
24    30
37    30
42    30
41    30
03    30
28    30
36    30
44    30
17    30
00    30
21    30
30    30
13    30
15    30
05    30
43    30
04    30
12    30
Name: labels, dtype: int64


In [6]:
base_path = 'C:\\Users\\chanp\\Desktop\\image food\\kaggle\\test'
classes = os.listdir(base_path)
filepaths = []
labels = []
for c in classes:
    flist = os.listdir(base_path + '/' + c)
    for f in flist:
        fpath = os.path.join(base_path, c, f)
        filepaths.append(fpath)
        labels.append(c)
print ('filepaths: ', len(filepaths), '   labels: ', len(labels))

filepaths:  1440    labels:  1440


In [7]:
Fseries=pd.Series(filepaths, name='file_paths')
Lseries=pd.Series(labels, name='labels')
test_df=pd.concat([Fseries,Lseries], axis=1)
test_df=pd.DataFrame(test_df, columns = ['file_paths', 'labels'])
print(test_df['labels'].value_counts())

29    30
02    30
20    30
22    30
14    30
32    30
39    30
27    30
47    30
25    30
06    30
09    30
08    30
16    30
33    30
18    30
23    30
01    30
31    30
40    30
35    30
10    30
26    30
45    30
34    30
38    30
46    30
19    30
11    30
07    30
24    30
37    30
42    30
41    30
03    30
28    30
36    30
44    30
17    30
00    30
21    30
30    30
13    30
15    30
05    30
43    30
04    30
12    30
Name: labels, dtype: int64


Balanced dataset so no need to augment or remove images. 

# **Visualize Images**

In [8]:
img = plt.imread('C:\\Users\\chanp\\Desktop\\image food\\kaggle\\train\\02\\0003.jpg')
img.shape


(224, 224, 3)

In [9]:
plt.figure(figsize=(14,10))
for i in range(20):
    random = np.random.randint(1,len(train_df))
    plt.subplot(4,5,i+1)
    img = train_df.loc[random,"file_paths"]
    plt.imshow(plt.imread(img))
    plt.title(train_df.loc[random, "labels"], size = 10, color = "black") 
    plt.xticks([])
    plt.yticks([])
    
plt.show()

Looks like we can perform some zooming and horizontal flips with ImageDataGenerator.

# **ImageDataGenerator**

In [None]:
target_size=(224,224)
batch_size=64
train_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input, zoom_range=0.2, horizontal_flip=True)
test_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.efficientnet.preprocess_input)
train_gen = train_datagen.flow_from_dataframe(train_df, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', class_mode='categorical')
valid_gen = test_datagen.flow_from_dataframe(valid_df, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', class_mode='categorical')
test_gen = test_datagen.flow_from_dataframe(test_df, x_col='file_paths', y_col='labels', target_size=target_size, batch_size=batch_size, color_mode='rgb', class_mode='categorical')

# **EfficientNetB0-based model**

In [None]:
base_model = tf.keras.applications.EfficientNetB0(include_top=False, input_shape=(224,224,3), weights='imagenet')
model = tf.keras.Sequential([
    base_model, 
    tf.keras.layers.GlobalAveragePooling2D(), 
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(), 
    tf.keras.layers.Dropout(0.2), 
    tf.keras.layers.Dense(48, activation='softmax')
])

In [None]:
lr=0.001
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=lr), metrics=['accuracy'])

# **Callbacks**

In [None]:
patience = 2
stop_patience = 5
factor = 0.5

callbacks = [
    tf.keras.callbacks.ModelCheckpoint("classify_model.h5", save_best_only=True, verbose = 0),
    tf.keras.callbacks.EarlyStopping(patience=stop_patience, monitor='val_loss', verbose=1, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=factor, patience=patience, verbose=1)
]
model.summary()

# **Training**

In [None]:
epochs = 30
history = model.fit(train_gen, validation_data=valid_gen, epochs=epochs, callbacks=callbacks, verbose=1)

In [None]:
plt.plot(history.history['loss'], label='Loss (training data)')
plt.plot(history.history['val_loss'], label='Loss (validation data)')
plt.title('Loss for Training')
plt.ylabel('Loss')
plt.xlabel('No. epoch')
plt.legend(['train', 'validation'], loc="upper left")
plt.show()
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# **Predictions on Test Set**

In [None]:
best_model = model
best_model.load_weights('./classify_model.h5')
best_model.evaluate(test_gen)

# **Predictions on Random Google Images**

In [None]:
labels_dict = test_gen.class_indices
key_list = list(labels_dict.keys())
val_list = list(labels_dict.values())

In [None]:
def predict_image(img_url):
    img=io.imread(img_url)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    resized_img = cv2.resize(img, (224, 224))
    img_tensor = tf.convert_to_tensor(resized_img, dtype=tf.float32)
    img_tensor = tf.expand_dims(img_tensor, 0)
    prediction = best_model.predict(img_tensor, use_multiprocessing=True)
    prediction = prediction.argmax()
    position = val_list.index(prediction)
    label = key_list[position]
    plt.figure(figsize=(14,10)) 
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.title(label, size = 10, color = "black")
    plt.xticks([])
    plt.yticks([])
    plt.show

In [None]:
predict_image('https://www.thespruceeats.com/thmb/m7lyKJKk2x7ezOnDM_oHncbP1xg=/1776x1332/smart/filters:no_upscale()/thai-green-curry-recipe-p3-3217442-hero-1-a3fcdfbc551849718c7750fa63ec8c6a.jpg')

Predicted chicken green curry as desired.

In [None]:
predict_image('https://3.bp.blogspot.com/_Liz-VpvKDvo/TLusgS1s_QI/AAAAAAAAFDA/NqurYE8r5wI/s1600/khaijiao+cover.jpg')

Predicted fried fish-paste balls instead of omelet.

In [None]:
predict_image('https://www.thespruceeats.com/thmb/Agleg-0qGlXWpgnHEGQUmVJoSlg=/4160x3120/smart/filters:no_upscale()/som-tam-thai-green-papaya-salad-3217407-hero-01-9e4281d9e4a64b0e8bb4930debcef3a3.jpg')

Predicted stir-fried chinese morning glory instead of green papaya salad.

In [None]:
predict_image('https://c8.alamy.com/comp/S0X6JP/grilled-giant-river-prawn-S0X6JP.jpg')

Predicted mango sticky rice instead of grilled river prawn.

Thanks for reading this notebook! Make sure to leave an upvote if this helped you out 😀 .