# DEVICE-BASED MODELS WITH TENSORFLOW LITE from [DeepLearning.AI](https://www.coursera.org/learn/device-based-models-tensorflow)

# Transfer Learning with TensorFlow Hub for TFLite

In [106]:
import numpy as np
import matplotlib.pylab as plt

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
tfds.disable_progress_bar()

from tqdm import tqdm

print("\u2022 Using TensorFlow Version:", tf.__version__)
print("\u2022 Using TensorFlow Hub Version: ", hub.__version__)
print('\u2022 GPU Device Found.' if tf.config.list_physical_devices('GPU') else '\u2022 GPU Device Not Found. Running on CPU')

• Using TensorFlow Version: 2.11.0
• Using TensorFlow Hub Version:  0.12.0
• GPU Device Not Found. Running on CPU


### Data Preprocessing

Use [TensorFlow Datasets](http://tensorflow.org/datasets) to load the cats and dogs dataset.

This `tfds` package is the easiest way to load pre-defined data. If you have your own data, and are interested in importing using it with TensorFlow see loading image data (TensorFlowLearn/TensorFlowTutorials/02_Load and Preprocess Data/images.ipynb).

The `tfds.load` method downloads and caches the data, and returns a `tf.data.Dataset` object. These objects provide powerful, efficient methods for manipulating data and piping it into your model.

Since `"cats_vs_dog"` only has one defined split, `train`, we are going to divide that into (train, validation, test) with 80%, 10%, 10% of the data respectively.

In [14]:
(train_examples, validation_examples, test_examples), info = tfds.load(
                "cats_vs_dogs",
                data_dir='./data',
                with_info=True,
                as_supervised=True,
                split=['train[:80%]', 'train[80%:90%]', 'train[90%:]']
)

num_examples = info.splits['train'].num_examples
num_classes = info.features['label'].num_classes

In [10]:
tfds.image_classification.CatsVsDogs()

<tensorflow_datasets.image_classification.cats_vs_dogs.CatsVsDogs at 0x225fc405cd0>

## Experiment

In [56]:
# import urllib.request, urllib.error
# import zipfile
# import os

# # Downloading the zip file
# url = 'https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_5340.zip'
# fname = 'cats_vs_dogs'
# local_zip = urllib.request.urlretrieve(url, fname + '.zip')

In [76]:
# local_zip = fname + '.zip'
# local_zip

'cats_vs_dogs.zip'

In [77]:
# # unzipping it to a current directory
# with zipfile.ZipFile(local_zip, 'r') as zip_ref:
#     zip_ref.extract('PetImages', './data')

In [69]:
# # renaming the folder from the archive
# # and keeping the directory we need
# os.renames('./data/PetImages',
#            f'./data/{fname}')

In [78]:
# # deleting the zip file from system
# os.remove(fname + '.zip')

In [8]:
# from pathlib import Path

# # Базовая директория
# base_dir = Path('./data/cats_vs_dogs')
# # base_dir = f'/content/drive/MyDrive/Sber DS/Diploma/data/cats_vs_dogs/'  # for colab
# classes = list(base_dir.iterdir())

# # и ее содержимое
# print("Содержимое базовой директории:")
# print(classes)

In [26]:
# base_dir = Path('./data').glob('**/*')

## list the directories
# [x for x in base_dir]  # or
# list(base_dir.iterdir())

# # list the files in the directory
# [f for f in base_dir.iterdir() if f.is_file()]

In [130]:
import os

# Базовая директория
base_dir = './data/cats_vs_dogs'
# base_dir = '/content/drive/MyDrive/Sber DS/Diploma/data/cats_vs_dogs/'  # for colab
classes = os.listdir(base_dir)

num_classes = len(classes)

# и ее содержимое
print("Содержимое базовой директории:")
print(classes)
num_classes

Содержимое базовой директории:
['Cat', 'Dog']


2

### Delete corrupted files

In [135]:
from pathlib import Path
from tensorflow.io import read_file
from tensorflow.image import decode_image

base_dir = Path('./data/cats_vs_dogs')

# data_dir is of type Path and points to the parent dir
# parent dir contains the directories 'Dog' and 'Cat'
# run the same code for the dir 'Cat' to remove corrupt files 
for image in sorted((base_dir /'Dog').glob('*')):
    try:
        img = read_file(str(image))
        img = decode_image(img)
        
        if img.ndim != 3:
            print(f"[FILE_CORRUPT] {str(image).split('/')[-1]} DELETED")
            image.unlink()
            
    except Exception as e:
        print(f"[ERR] {str(image).split('/')[-1]}: {e} DELETED")
        image.unlink()

[FILE_CORRUPT] data\cats_vs_dogs\Dog\10158.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\10401.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\10747.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\10797.jpg DELETED
[ERR] data\cats_vs_dogs\Dog\11233.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Number of channels inherent in the image must be 1, 3 or 4, was 2 [Op:DecodeImage] DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\11410.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\11675.jpg DELETED
[ERR] data\cats_vs_dogs\Dog\11702.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input is empty. [Op:DecodeImage] DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\11849.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Dog\11853.jpg DELETED
[ERR] data\cats_vs_dogs\Dog\11912.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Number of channels inherent in t

In [136]:
for image in sorted((base_dir /'Cat').glob('*')):
    try:
        img = read_file(str(image))
        img = decode_image(img)
        
        if img.ndim != 3:
            print(f"[FILE_CORRUPT] {str(image).split('/')[-1]} DELETED")
            image.unlink()
            
    except Exception as e:
        print(f"[ERR] {str(image).split('/')[-1]}: {e} DELETED")
        image.unlink()

[FILE_CORRUPT] data\cats_vs_dogs\Cat\10125.jpg DELETED
[ERR] data\cats_vs_dogs\Cat\10404.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Unknown image file format. One of JPEG, PNG, GIF, BMP required. [Op:DecodeImage] DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\10501.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\10820.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\11210.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\11565.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\11874.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\11935.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\140.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\2663.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\3300.jpg DELETED
[FILE_CORRUPT] data\cats_vs_dogs\Cat\3491.jpg DELETED
[ERR] data\cats_vs_dogs\Cat\4351.jpg: {{function_node __wrapped__DecodeImage_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input size should match (header_size + row_size 

Source: https://github.com/tensorflow/datasets/issues/2188

### Datasets creation

In [137]:
def datasets_prep(base_directory=base_dir,
                  seed=123, validation_split=0.2,
                  batch_size=32,
                  image_size=IMAGE_SIZE, 
                  label_mode='binary'):
    
    train_ds = tf.keras.utils.image_dataset_from_directory(
                            base_dir,
                            label_mode=label_mode,
                            validation_split=validation_split,
                            subset='training',
                            seed=seed,
                            image_size=IMAGE_SIZE,
                            batch_size=batch_size)
    
    val_ds = tf.keras.utils.image_dataset_from_directory(
                            base_dir,
                            label_mode=label_mode,
                            validation_split=validation_split,
                            subset='validation',
                            seed=seed,
                            image_size=IMAGE_SIZE,
                            batch_size=batch_size)
    
    val_ds_num = len(val_ds)
    take = int(val_ds_num/2)
    test_ds = val_ds.take(take)
    val_ds = val_ds.skip(take)
    
    print('Batches for testing:', test_ds.cardinality())
    print('Batches for validating:', val_ds.cardinality())
    
    return train_ds, val_ds, test_ds

In [138]:
# Проверка корректности формирования тестового датасета
dataset = tf.data.Dataset.range(10)
take = int(len(dataset)/2)
print(take)

test = dataset.take(take)
print('test:', list(test.as_numpy_iterator()))
dataset = dataset.skip(take)
print('valid:', list(dataset.as_numpy_iterator()))

5
test: [0, 1, 2, 3, 4]
valid: [5, 6, 7, 8, 9]


In [139]:
train_batches, validation_batches, test_batches = datasets_prep()

Found 24929 files belonging to 2 classes.
Using 19944 files for training.
Found 24929 files belonging to 2 classes.
Using 4985 files for validation.
Batches for testing: tf.Tensor(78, shape=(), dtype=int64)
Batches for validating: tf.Tensor(78, shape=(), dtype=int64)


In [140]:
for image_batch, label_batch in train_batches.take(1):
    pass

image_batch.shape

TensorShape([32, 224, 224, 3])

### AUTOTUNE

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_batches = train_batches.cache().prefetch(buffer_size=AUTOTUNE)
validation_batches = validation_batches.cache().prefetch(buffer_size=AUTOTUNE)
test_batches = test_batches.cache().prefetch(buffer_size=AUTOTUNE)

### Image rescaling layer

In [141]:
rescale = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255,
                              input_shape=IMAGE_SIZE + (3,),
                              name='Rescaling')
])

### Select the Hub/TF2 Module to Use

Hub modules for TF 1.x won't work here, please use one of the selections provided.

In [142]:
#@param ["(\"mobilenet_v2\", 224, 1280)", "(\"inception_v3\", 299, 2048)"] {type:"raw", allow-input: true}
module_selection = ("mobilenet_v2", 224, 1280) 
handle_base, pixels, FV_SIZE = module_selection
MODULE_HANDLE ="https://tfhub.dev/google/tf2-preview/{}/feature_vector/4".format(handle_base)
IMAGE_SIZE = (pixels, pixels)
print("Using {} with input size {} and output dimension {}".format(MODULE_HANDLE, IMAGE_SIZE, FV_SIZE))

Using https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4 with input size (224, 224) and output dimension 1280


### Defining the Model

All it takes is to put a linear classifier on top of the `feature_extractor_layer` with the Hub module.

For speed, we start out with a non-trainable `feature_extractor_layer`, but you can also enable fine-tuning for greater accuracy.

In [143]:
do_fine_tuning = False #@param {type:"boolean"}

Load TFHub Module

In [144]:
feature_extractor = hub.KerasLayer(
                            MODULE_HANDLE,
                            # input_shape=IMAGE_SIZE + (3,),
                            output_shape=[FV_SIZE],
                            trainable=do_fine_tuning
)

In [145]:
print('Building model with', MODULE_HANDLE)

model = tf.keras.Sequential([
        rescale,
        feature_extractor,
        tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.summary()

Building model with https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/4
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_9 (Sequential)   (None, 224, 224, 3)       0         
                                                                 
 keras_layer_4 (KerasLayer)  (None, 1280)              2257984   
                                                                 
 dense_5 (Dense)             (None, 2)                 2562      
                                                                 
Total params: 2,260,546
Trainable params: 2,562
Non-trainable params: 2,257,984
_________________________________________________________________


In [146]:
if do_fine_tuning:
    model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.002, momentum=0.9),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])
else:
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

In [147]:
EPOCHS = 1

hist = model.fit(train_batches,
                 epochs=EPOCHS,
                 validation_data=validation_batches)



### Export the Model

In [150]:
CATS_VS_DOGS_SAVED_MODEL = "./saved/exp_saved_model"

In [151]:
tf.saved_model.save(model, CATS_VS_DOGS_SAVED_MODEL)



INFO:tensorflow:Assets written to: ./saved/exp_saved_model\assets


INFO:tensorflow:Assets written to: ./saved/exp_saved_model\assets


In [153]:
loaded = tf.saved_model.load(CATS_VS_DOGS_SAVED_MODEL)
loaded

<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject at 0x2d704838700>

In [154]:
print(list(loaded.signatures.keys()))

['serving_default']


In [155]:
infer = loaded.signatures['serving_default']
infer

<ConcreteFunction signature_wrapper(*, sequential_9_input) at 0x2D704389430>

In [156]:
print(infer.structured_input_signature)

((), {'sequential_9_input': TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='sequential_9_input')})


In [157]:
print(infer.structured_outputs)

{'dense_5': TensorSpec(shape=(None, 2), dtype=tf.float32, name='dense_5')}


### Convert Using TFLite's Converter

In [159]:
converter = tf.lite.TFLiteConverter.from_saved_model(CATS_VS_DOGS_SAVED_MODEL)
converter

<tensorflow.lite.python.lite.TFLiteSavedModelConverterV2 at 0x2d740a297f0>

In [None]:
# # import shutil
# shutil.rmtree('./data/' + fname)