In [1]:
# !pip install -q tensorflow-model-optimization

In [2]:
## docker run -it --gpus all --rm -v $PWD:/tf/host -p 8585:8888 tensorflow/tensorflow:latest-gpu-jupyter

In [3]:
!pip install -q jupyter-autotime

You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [4]:
%load_ext autotime

!nvidia-smi -L

import os

os.environ['CUDA_VISIBLE_DEVICES']='0'

GPU 0: NVIDIA GeForce RTX 3090 (UUID: GPU-3b49e2b8-87f0-c515-798b-3492ec05a183)
GPU 1: NVIDIA GeForce GTX 1080 Ti (UUID: GPU-07628ed7-6ef8-fd67-7d03-cb6a89f72de4)


In [5]:
import numpy as np, tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input

from pathlib import Path
# from tqdm.notebook import tqdm

In [6]:
tf.config.get_visible_devices("GPU")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [7]:
def get_dir_size(directory):
    root_dir=Path(".")
    size = sum(f.stat().st_size for f in root_dir.glob(directory+'/**/*') if f.is_file())
    return f"Size in MB: {size // (1024*1024)}"

In [8]:
train_path = "./dataset/data_splitting/Train/"
valid_path = "./dataset/data_splitting/Test/"
test_path = "./dataset/data_splitting/Pred/"

In [9]:
# You can add more augmentations, if you want

train_gen = ImageDataGenerator(
    rotation_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    preprocessing_function=preprocess_input,
)

gen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

In [10]:
targetMap='''aegypti landing
aegypti smashed
albopictus landing
albopictus smashed
Culex landing
Culex smashed'''.split('\n')
targetMap

['aegypti landing',
 'aegypti smashed',
 'albopictus landing',
 'albopictus smashed',
 'Culex landing',
 'Culex smashed']

In [11]:
# Hyper-Parameters
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
EPOCHS = 3
NUM_CLASSES = len(targetMap)

In [12]:
train = train_gen.flow_from_directory(train_path, target_size=IMG_SIZE,
                                      classes=targetMap, class_mode='categorical', batch_size=BATCH_SIZE)
valid = gen.flow_from_directory(valid_path, target_size=IMG_SIZE,
                                      classes=targetMap, class_mode='categorical', batch_size=BATCH_SIZE)
test = gen.flow_from_directory(test_path, target_size=IMG_SIZE,
                                      classes=targetMap, class_mode='categorical', batch_size=BATCH_SIZE)

Found 4200 images belonging to 6 classes.
Found 1799 images belonging to 6 classes.
Found 3600 images belonging to 6 classes.


In [19]:
model = keras.models.load_model("./optimized/pruned_model/", compile=False)
# For evaluation, you must run compile again (but don't need it for production as you only use predict)

In [14]:
for i in range(10):
    imgs, _ = test.next()
    for img in imgs:
        img = np.expand_dims(img, axis=0)
        model.predict(img, verbose=0)

# WITHOUT prunning

In [17]:
fmd = "./mlflow/artifacts/1/38162c8d183043f1bfddf866e1ee9175/artifacts/model/data/model" #final model directory

In [20]:
noprun = keras.models.load_model("./optimized/pruned_model/", compile=False)

In [46]:
for i in range(1000):
    imgs, _ = test.next()
    for img in imgs:
        img = np.expand_dims(img, axis=0)
        model.predict(img, verbose=0)

## TensorRT

In [22]:
from tensorflow.python.compiler.tensorrt import trt_convert as trt

In [25]:
params = trt.DEFAULT_TRT_CONVERSION_PARAMS._replace(
    precision_mode='FP16')

In [29]:
get_dir_size("./optimized/pruned_model/")

'Size in MB: 38'

In [26]:
converter = trt.TrtGraphConverterV2(input_saved_model_dir="./optimized/pruned_model/")
converter.convert()
converter.save("./optimized/optimized_for_latency/")

INFO:tensorflow:Linked TensorRT version: (7, 2, 2)
INFO:tensorflow:Loaded TensorRT version: (7, 2, 2)
INFO:tensorflow:Could not find TRTEngineOp_000_000 in TF-TRT cache. This can happen if build() is not called, which means TensorRT engines will be built and cached at runtime.




INFO:tensorflow:Assets written to: ./optimized/optimized_for_latency/assets


INFO:tensorflow:Assets written to: ./optimized/optimized_for_latency/assets


In [27]:
get_dir_size("./optimized/optimized_for_latency/")

'Size in MB: 76'

In [37]:
latency = tf.saved_model.load("./optimized/optimized_for_latency/")

In [42]:
infer = latency.signatures['serving_default']

In [45]:
for i in range(1000):
    imgs, _ = test.next()
    for img in imgs:
        img = tf.expand_dims(img, axis=0)
        infer(img)

# TensorRT Pruned

In [47]:
def prune_custom_layer(layer):
    # prunning_params is optional (don't use it if you want)
#     end_step = np.ceil(train.n/BATCH_SIZE).astype(np.int32) * EPOCHS
#     prunning_params = {
#         tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.5, 
#                                              final_sparsity=0.9,
#                                              begin_step=0,
#                                             end_step=end_step)
#     }
    try:
        return tfmot.sparsity.keras.prune_low_magnitude(layer, **prunning_params)
    except:
        return layer

In [48]:
model = tf.keras.models.clone_model(
    latency,
    clone_function=prune_custom_layer
)
model.summary()

ValueError: Expected `model` argument to be a `Model` instance. Received: model=<tensorflow.python.saved_model.load.Loader._recreate_base_user_object.<locals>._UserObject object at 0x7fe93476d9a0>