In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Importing library and modeules

In [None]:
import numpy as np
import tensorflow as tf

# Train and Test data

### learn more about pathlib
* https://towardsdatascience.com/10-examples-to-master-python-pathlib-1249cc77de0b

In [None]:
import pathlib
train_dir = pathlib.Path("../input/fruits/fruits-360_dataset/fruits-360/Training")
test_dir = pathlib.Path("../input/fruits/fruits-360_dataset/fruits-360/Test")

### New function/Concept 'glog()'
* The glob module is a useful part of the Python standard library. glob (short for global) is used to return all file paths that match a specific pattern.

* We can use glob to search for a specific file pattern, or perhaps more usefully, search for files where the filename matches a certain pattern by using wildcard characters.
* Learn more about glob
* https://towardsdatascience.com/the-python-glob-module-47d82f4cbd2d

In [None]:
# Total number of images in training data-set

image_count = len(list(train_dir.glob('*/*.jpg')))
image_count

# Showing / Visualize Image


* here we are using matplotlib for visualizing our data
* we have open our image and converted to digits using Pillow


In [None]:
import matplotlib.pyplot as plt
import PIL
fruits = list(train_dir.glob('Banana/*.jpg'))

plt.figure(figsize=(10, 10))

for i in range(3):
    plt.subplot(3, 3, i + 1)
    img = PIL.Image.open(str(fruits[i]))
    plt.imshow(img)
    plt.axis('off')

plt.show()

# Setting Up variables

In [None]:
batch_size = 32
img_height = 100
img_width = 100


# Collecting Data
### Used keras 'image_dataset_from_directory' API for collrcting data from directories
* Learn more about 'image_dataset_from_directory' 
* https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory

In [None]:
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset='training',
    seed=42,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset='validation',
    seed=42,
    image_size=(img_height, img_width),
    batch_size=batch_size
)

# Visualizing friuts by classes

In [None]:
class_names = train_ds.class_names
num_classes = len(class_names)

In [None]:
plt.figure(figsize=(10, 10))

for images, labels in train_ds.take(1):
    for i in range(25):
        plt.subplot(5, 5, i + 1)
        plt.imshow(images[i].numpy().astype('uint8'))
        plt.title(class_names[labels[i]])
        plt.axis('off')

# Preprocessing/Setting Up Base Model

### prefetch the data for faster training while model is trained
* Learning more about prefetch and AUTOTUNE
* prefetch : https://towardsdatascience.com/optimising-your-input-pipeline-performance-with-tf-data-part-1-32e52a30cac4#:~:text=Prefetching%20solves%20the,they%20are%20requested.
* AUTOTUNE : https://stackoverflow.com/questions/56613155/tensorflow-tf-data-autotune

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

### Data Augumentation
* Data augmentation is a set of techniques to artificially increase the amount of data by generating new data points from existing data. This includes making small changes to data or using deep learning models to generate new data points.



In [None]:
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)
])

### Using ResNEt Model for Transfer Learning
* Learn more about ResNet by going through paper
* https://paperswithcode.com/method/resnet

In [None]:
preprocess_input = tf.keras.applications.resnet.preprocess_input

In [None]:
base_model = tf.keras.applications.resnet.ResNet50(
    input_shape=(img_height, img_width, 3),
    include_top=False,
    weights='imagenet'
)

* setting base model trainable to False so model take less time

In [None]:
base_model.trainable = False

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
prediction_layer = tf.keras.layers.Dense(num_classes)

# Building Model

In [None]:
inputs = tf.keras.Input(shape=(100, 100, 3))
x = data_augmentation(inputs)
x = preprocess_input(x)
x = base_model(x, training=False)
x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = prediction_layer(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)

model.compile(
    optimizer=optimizer,
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [None]:
model.summary()

# Training the model

In [None]:
model.evaluate(val_ds)

* evolution accuracy is very bad but wait for traning 

In [None]:
epochs = 15

history = model.fit(
    train_ds,
    epochs=epochs,
    validation_data=val_ds
)

# Visualization of Accuracy and loss

### Loss 

In [None]:
train_loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 10))
plt.plot(epochs_range, train_loss, label="Training Loss")
plt.plot(epochs_range, val_loss, label="Validation Loss")
plt.legend(loc='upper left')
plt.title('Training and Validation Loss')

plt.show()

### Accuracy

In [None]:
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

epochs_range = range(epochs)

plt.figure(figsize=(12, 10))
plt.plot(epochs_range, train_acc, label="Training Accuracy")
plt.plot(epochs_range, val_acc, label="Validation Accuracy")
plt.legend(loc='upper left')
plt.title('Training and Validation Accuracy')

plt.show()

# I hope you like this ;)

# Checking prediction on validation data-set with visualisation

In [None]:
def predict(model, img):
    img_array = tf.keras.utils.img_to_array(images[i].numpy())
    img_array = tf.expand_dims(img_array, 0)

    predictions = model.predict(img_array)

    predicted_class = class_names[np.argmax(predictions[0])]
    confidence = round(100*(np.max(predictions[0])), 0)
    return predicted_class, confidence

In [None]:
plt.figure(figsize=(15, 15))
for images, labels in val_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i+1)
        plt.imshow(images[i].numpy().astype("uint8"))

        predicted_class, confidence = predict(model, images[i].numpy())
        actual_class = class_names[labels[i]]

        plt.title(f"Actual: {actual_class}, \n Predicted: {predicted_class}.\n Confidence: {confidence}%")

        plt.axis('off')



# Saving model

In [None]:
model_version = 1
model.save(f"./models/{model_version}")

# Zipping all the files in models

In [None]:
import shutil
shutil.make_archive("model", 'zip', './')