**Fruits Recognition**

* Data Importing
* Defining Labels (Classes)
* Train, Validation and Test
* Augmenting the Image Dataset
* Building a Model
* Testing Model

In [None]:
import numpy as np 
import pandas as pd
import os
from pathlib import Path
from sklearn.model_selection import train_test_split 
from keras.preprocessing.image import ImageDataGenerator
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
import tensorflow as tf
import matplotlib.pyplot as plt

**Data Importing**

In [None]:
direc = Path('../input/fruits-recognition/fruits_data/train')
filepaths = list(direc.glob(r'**/*.jpg'))
filename = list(map(lambda x: os.path.split(x)[1],filepaths))


filepaths = pd.Series(filepaths, name='FilePaths').astype(str)
filename = pd.Series(filename, name='filename').astype(str)

train_imgdf = pd.merge(filepaths, filename, right_index = True, left_index = True)

train_imgdf

**Defining Labels (Classes)**

In [None]:
labels=pd.read_csv(r'../input/fruits-recognition/fruits_data/Training_set.csv')
labels

In [None]:
train_imgdf1 = pd.merge(train_imgdf, labels, on='filename')
train_imgdf1 = train_imgdf1.drop('filename', axis=1)
train_imgdf1

*Checking the images with labels*

In [None]:
import matplotlib.pyplot as plt
f,a = plt.subplots(nrows=5, ncols=8,figsize=(13, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(a.flat):
    ax.imshow(plt.imread(train_imgdf1.FilePaths[i]))
    ax.set_title(train_imgdf1.label[i])
    
plt.tight_layout()
plt.show()

In [None]:
print(f" Count of Rows : {train_imgdf1.shape[0]} \n Count of Columns : {train_imgdf1.shape[1]} ")

In [None]:
train_imgdf2 = train_imgdf1['label'].value_counts(ascending=True)
train_imgdf2 = pd.DataFrame(train_imgdf2).reset_index()
train_imgdf2 = train_imgdf2.rename(columns={"index": "label", "label": "count"})
train_imgdf2

In [None]:
x = train_imgdf2['label']
y = train_imgdf2['count']
plt.bar(x,y)
plt.title("FRUITES")
plt.figure(figsize=(8,5))
plt.show()

**Train, Validation and Test**

In [None]:
x_train, x_test = train_test_split(train_imgdf1, test_size=0.15, stratify=train_imgdf1['label'])

print(f'Shape of Training Data : ',x_train.shape)
print(f'Shape of Testing Data : ',x_test.shape)

**Augmenting the Image Dataset**

In [None]:
img_datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)

img_size=(224, 224)    

x_train = img_datagen.flow_from_dataframe(dataframe = x_train, x_col='FilePaths', y_col='label', target_size=img_size, color_mode='rgb',class_mode='categorical',batch_size=32,seed=42)
x_test = img_datagen.flow_from_dataframe(dataframe = x_test, x_col='FilePaths', y_col='label', target_size=img_size,color_mode='rgb',class_mode='categorical',batch_size=32,seed=42)

**Building a Model**

In [None]:
model = keras.Sequential([

    # First Convolutional Block
    tf.keras.layers.Conv2D(filters=32, kernel_size=5, activation="relu", padding='same',
                  # give the input dimensions in the first layer
                  # [height, width, color channels(RGB)]
                 input_shape=[224, 224, 3]),
    tf.keras.layers.MaxPool2D(),

    # Second Convolutional Block
    tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation="relu", padding='same'),
    tf.keras.layers.MaxPool2D(),

    # Third Convolutional Block
    tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation="relu", padding='same'),
    tf.keras.layers.MaxPool2D(),

    # Classifier Head
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation="relu"),
    tf.keras.layers.Dense(131, activation="softmax"),
])
model.summary()

model.compile(optimizer="adam",
             loss="binary_crossentropy",
             metrics=["accuracy"])

In [None]:
Callback = tf.keras.callbacks.EarlyStopping(monitor='accuracy', patience=3)
model_fit = model.fit(x_train,
                      validation_data = x_test, 
                      epochs = 10, callbacks=Callback)

In [None]:
plt.plot(model_fit.history['loss'], label='train')
plt.plot(model_fit.history['val_loss'], label='test')
plt.legend()
plt.show()

In [None]:
print('Model summary :')
print()
model.summary()

**Testing Model**

In [None]:
test_accuracy = model.evaluate(x_test)[1] * 100
print('Test accuracy is : ',test_accuracy, '%' )

In [None]:
direc2 = Path(r'../input/fruits-recognition/fruits_data/test')
filepaths2 = list(direc2.glob(r'**/*.jpg'))
filename2 = list(map(lambda x: os.path.split(x)[1],filepaths2))


filepaths2 = pd.Series(filepaths2, name='FilePaths').astype(str)
filename2 = pd.Series(filename2, name='filename').astype(str)

test_imgdf = pd.merge(filepaths2, filename2, right_index = True, left_index = True)

test_imgdf

In [None]:
test_filenames = os.listdir(r'../input/fruits-recognition/fruits_data/test')
test_df = pd.DataFrame({
    'filename': test_filenames
})
nb_samples = test_df.shape[0]

In [None]:
test_gen = ImageDataGenerator(preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input)
test_generator = test_gen.flow_from_dataframe(
    test_imgdf, 
    "../input/fruits-recognition/fruits_data/test/", 
    x_col='filename',
    y_col=None,
    target_size=img_size,
    class_mode=None,
    batch_size=32,
    shuffle=None,
    seed=42
)

In [None]:
predict = model.predict(test_generator, steps=np.ceil(nb_samples/32))

In [None]:
x_train.class_indices.items()

In [None]:
test_df['label'] = np.argmax(predict, axis=-1)

In [None]:
label_map = dict((v,k) for k,v in x_train.class_indices.items())

In [None]:
test_df['label'] = test_df['label'].replace(label_map)

In [None]:
test_df

In [None]:
labels2=pd.read_csv(r'../input/fruits-recognition/fruits_data/Testing_set.csv')
labels2

In [None]:
test_imgdf1 = pd.merge(labels2, test_df, on='filename', how='left')
test_imgdf1

In [None]:
test_imgdf2 = test_imgdf1.copy()
test_imgdf2 = test_imgdf2.drop('filename', axis=1)
test_imgdf2

In [None]:
test_imgdf_to_see =  pd.merge(test_imgdf, test_imgdf1, on='filename', how='right')
test_imgdf_to_see

In [None]:
import matplotlib.pyplot as plt
f,a = plt.subplots(nrows=5, ncols=8,figsize=(13, 7),
                        subplot_kw={'xticks': [], 'yticks': []})

for i, ax in enumerate(a.flat):
    ax.imshow(plt.imread(test_imgdf_to_see.FilePaths[i]))
    ax.set_title(test_imgdf_to_see.label[i])
    
plt.tight_layout()
plt.show()

**Outputting the results as a Submission**

In [None]:
test_imgdf2.to_csv('/kaggle/working/test_submission.csv', index=False)