In [None]:
# This is transfer learning with Mobilenet V2
# https://www.youtube.com/watch?v=84J1fMklQWE
# Downloads Feature Extractor from Tensorflow Hib (tfhub.dev)
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub

from IPython.display import Image, display

import os
import numpy as np

import tensorflow_datasets as tfds

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!nvidia-smi

In [None]:
DRIVE_HOME = "/content/gdrive/MyDrive"

In [None]:
# The Collab Notebook is by default at /content location?? Let's find out later
# Switch to Dataset path and get data into it
#!pwd
#!ls
#%cd $DRIVE_HOME/Technical/Coding/Kaggle_datasets/Bean_Classification
!pwd


In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content/gdrive/MyDrive/Technical/Coding/Kaggle_datasets'

In [None]:
import json
# Kaggle API Documenttation for configuring Access Credentials - https://github.com/Kaggle/kaggle-api
# You can find/generate API access token by going to Profile->Account section
# Once generated, it downloads the Kaggle JSON. Note the details there and run following
api_token_json_str = {"username":"prakashjanjanam","key":"f97185b3563b44ca62b499d473e3c4b0"}
#!mkdir ~/.kaggle
#!touch ~/.kaggle/kaggle.json
with open('/root/.kaggle/kaggle.json','w') as f:
  json.dump(api_token_json_str, f)
 

In [None]:
!cp ~/.kaggle/kaggle.json /content/gdrive/MyDrive/Technical/Coding/Kaggle_datasets
!kaggle datasets download -d prakharrastogi534/bean-leaf-dataset

In [None]:
# Unzip the Dataset to extract train/test/validation folders
!pwd
!ls -lart
!unzip bean-leaf-dataset.zip

In [None]:
!pwd
!find . -type f | wc -l # Total Images
!find ./train -type f | wc -l # Train Images
!find ./test -type f | wc -l # Test Set
!find ./validation -type f | wc -l # Validation Set

In [None]:
# See Sample Images
display(Image("/content/drive/MyDrive/Technical/Coding/Kaggle_datasets/Bean_Classification/train/train/angular_leaf_spot/angular_leaf_spot_train.0.jpg"))

In [None]:
# We will do Transfer learning (with custom fine tuning with final few layers)
# In ths exercise, we will use pre-trained mobile_net V2 model
batch_size = 128
img_height = 224
img_width = 224

In [None]:
# Now, let's prepare the Datasets using Keras Preprocessing
# API Which will resize the images and prepare datasets as batches
# This will make it compatible to be fed into the model as a step itself
train_ds = tf.keras.preprocessing.image_dataset_from_directory('train/train',
  seed=111,
  image_size=(img_height, img_width),
  batch_size=batch_size)


In [None]:
test_ds = tf.keras.preprocessing.image_dataset_from_directory('test/test',
  seed=111,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory('validation/validation',
  seed=111,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
val_ds = tf.keras.preprocessing.image_dataset_from_directory('train/train',
  seed=111,
  image_size=(img_height, img_width),
  batch_size=batch_size)

In [None]:
for image_batch, label_batch in train_ds:
  print(image_batch.shape)
  print(label_batch.shape)


In [None]:
# Let's see what claases are available
classes = train_ds.class_names
print(classes)
# Let's print 1st 10 images of 1st Batch
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(classes[labels[i]])
    plt.axis("off")


In [None]:
# AUTO TUNE for effective Batch extraction and caching through Prefetch
# That saves time for Training
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
# TF Hub is a Model Zoo with all Pre-trained MOdels(Text, Image Problem domains)
# Feature Vector is a trained instance of network that only gets feaqture vectors and not final classification layers
# These final layers, we will add and train
feature_extractor = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4"

In [None]:
feature_extractor_layer = hub.KerasLayer(feature_extractor, input_shape=(img_height,img_width,3))

In [None]:
# Don't want to retrain the MOdel for now, hence re using as-is
# Unless your data is completely different, we cna reuse and add final dense/classificationn layers to only train those last layers
# If you think data is completely different, we can use architecture and re-train all layers
feature_extractor_layer.trainable = False

In [None]:
feature_extractor_layer.trainable = False

In [None]:
# Optimization Layer works faster to converge with normalized data
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

In [None]:
# Matches my Dataset preparation steps, the seed given above there
tf.random.set_seed(111)

In [None]:
# Here we are using Pre-trained Mode Vector above
# Last Dropout added for reducing Overfitting
# Last layer adds 3 output predictions as we needed
model = tf.keras.Sequential([
  normalization_layer,
  feature_extractor_layer,
  tf.keras.layers.Dropout(0.3),
  tf.keras.layers.Dense(3,activation='softmax')
])


In [None]:
# Sparse Categorical cross entropy to be used when we didn't one hot encode the target
# Where as if we did ONe-hot encoding, we use just CategoricalCross Entropy
model.compile(
  optimizer='adam',
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])


In [None]:
# This can be trained for 10 or 20 Epochs. Training is pretty fast as it's only last layer training
# You could do an Early stopping call back if needed and accuracy doesn't improve
history = model.fit(train_ds, epochs=20, validation_data=val_ds)

In [None]:
# Note that Rescaling (Pre-processing) is In-built to the modle itself
# So, if we deploy the graph object, entire pre-processing is also deployed
# There are lot of Pre-processing options within Keras within computation graph of the model
# You can do one hot encoding, Standardization within the graph itself
# THis is new feature after 2.4
model.summary()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_acc', 'val_acc'], loc='best')
plt.show()


In [None]:
# Let's now go and evaluate with test dataset
# It's close to Train, Validation Accuracy. So, it's decent enough
result = model.evaluate(test_ds)

In [None]:
# Visualize MOdel output with Actual and Predicted Labels
# 1 Batch, 9 Images in Subplots
# Since we are ineferring individual image here, we need to pre-process it
# As Array and add the Batch Dim that the model expects
plt.figure(figsize=(10, 10))
for images, labels in test_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)

    plt.tight_layout()
    
    img = tf.keras.preprocessing.image.img_to_array(images[i])                    
    img = np.expand_dims(img, axis=0)  

    pred=model.predict(img)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title("Actual Label: %s" % classes[labels[i]])
    plt.text(1, 240, "Predicted Label: %s" % classes[np.argmax(pred)], fontsize=12)

    plt.axis("off")


In [None]:
# Saving in .TF format to dpeloy to CI-CD pipeline
# TF is the latets format. We can save in old h5 format too
model.save('./models', save_format='tf')


In [None]:
# Note that from the saved directory, this is what the contents infer
# Saved_model.pb is the Graph Object; Keras_metadata.pb is overall Keras meta data including Pre-processing function
# TODO - Load the folder structure
!pwd
!ls -lart models

In [None]:
# Load abd verify the model now
model_loaded = tf.keras.models.load_model('./models/')
model_loaded.summary()

In [None]:
# We are using PIL, SkImage for Image Pre-processing
# Before passing to INference
from PIL import Image
import numpy as np
from skimage import transform
def process(filename):
   np_image = Image.open(filename)
   np_image = np.array(np_image).astype('float32')
   np_image = transform.resize(np_image, (224, 224, 3))
   np_image = np.expand_dims(np_image, axis=0)
   return np_image


In [None]:
pred_label=model_loaded.predict(process('train/train/healthy/healthy_train.0.jpg'))
print(classes[np.argmax(pred_label)])


In [None]:
pred_label

In [None]:
# We will use this Zipped model to deploy to CI-CD
!zip -r models.zip models/

In [None]:
# We need to ensure that the TF version used to build this model
# Matches what we will use in container while serving
# TF APIs are aggressively changing, we just wnat to make sure
# This model was trained on Colab https://colab.research.google.com/drive/1mCqYXlEQu075Q3ByMICtEcS_bhjI4sI2#scrollTo=zKqC-DSAuF23
# At that time, the TFT version used was 2.8.0
tf.__version__

In [None]:
!ls -lart