# Code required for Google Colab

In [1]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False

In [2]:
if not IN_COLAB:
    print("No Colab")

No Colab


# Getting the trained model

In [3]:
import os, sys, math, datetime, configparser
import pathlib
from pathlib import Path
import numpy as np
import pandas as pd
import random
from matplotlib import pyplot as plt
import plotly.graph_objects as go
import PIL
import PIL.Image
import seaborn as sns
import socket
import sklearn
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, f1_score, accuracy_score



import tensorflow as tf
keras = tf.keras
from keras.layers import ReLU
from keras.layers import Input, Dense, Flatten, Conv2D,DepthwiseConv2D, MaxPooling2D, AvgPool2D, GlobalAveragePooling2D, BatchNormalization, Concatenate, Reshape, Multiply, Add, Activation
from keras.models import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping, LearningRateScheduler

from workbench.config.config import initialize
from workbench.utils.utils import create_filepaths
from workbench.tensorflow import set_batchnorm_momentum, set_dropout
from workbench.wandb import wandb_model_DB, get_model_DB_run_id_from_architecture, get_vww_training_run_id_from_architecture

import wandb
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

%load_ext autoreload

In [4]:
%reload_ext autoreload
%autoreload

In [5]:
HOST_NAME = socket.gethostname()
HOST_NAME

if HOST_NAME in ["default"]:
    config = configparser.ConfigParser()
    config.read("config.ini")

    HOST_NAME = config['MACHINE']['HOST_NAME']

# Model architecture

In [7]:
global model_name
model_name = "mobilenetv1_0.1_96_c3_o2_l5.MV1"


In [8]:
# DANGER ZONE: Disable warning messages

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

# Visual Wake Words dataset


In [9]:
if IN_COLAB:
    #path to the base directory of the visual_wake_words dataset
    vww_path = Path("/content/visual_wake_words")

else:
    vww_path = Path.cwd().joinpath("datasets","visual_wake_words")
vww_path.exists()

True

# Get the model

In [10]:
models_dir = initialize()

In [11]:
models_path, models_summary_path, models_image_path, models_layer_df_path, models_tf_path, models_tflite_path, models_tflite_opt_path = create_filepaths(model_name)

c:\tiny_mlc\tiny_cnn\models


In [12]:
! explorer $models_path

In [14]:
global base_model_name
global alpha
global resolution
global channels
global classes
global variation
global early_stopping_patience

In [15]:
base_model_name, alpha, resolution, channels, classes, variation = model_name.split("_")

# Set training parameters

In [16]:
dataset = "vvw_minval_datagen_fix"

BATCH_SIZE = 50

PROJECT = "model_DB_visual_wake_words"
ENTITY = "susbrock"


In [17]:
alpha = float(alpha)
resolution = int(resolution)
classes = int(classes.strip("o"))
channels = int(channels.strip("c"))

# Visual Wake Words minval


In [21]:
def get_vvw_minval_datagen_fix(dataset_path, img_width, img_height, batch_size, channels, normalize=True):
    TRAIN_DIR = dataset_path.joinpath("train")
    VAL_DIR = dataset_path.joinpath("val")
    TEST_DIR = dataset_path.joinpath("test")
    #Path.exists(BASE_DIR)
    #validation_split = 0
    color_mode = "rgb"

    train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=.1,
        horizontal_flip=True,
        #validation_split=validation_split,
        rescale=1. / 255)
    
    train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=(img_height, img_width),
        batch_size=BATCH_SIZE,
        #subset='training',
        color_mode=color_mode,
        class_mode="sparse",
        shuffle=True
        )
    

    val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)

    val_generator = val_datagen.flow_from_directory(
        VAL_DIR,
        target_size=(img_height, img_width),
        batch_size=BATCH_SIZE,
        #subset='validation',
        color_mode=color_mode,
        class_mode="sparse",
        shuffle=True)
    
    test_gen =  tf.keras.preprocessing.image.ImageDataGenerator(rescale=1. / 255)
    
    test_generator = test_gen.flow_from_directory(
        TEST_DIR,
        target_size=(img_height, img_width),
        batch_size=1,# BATCH_SIZE, # was 1
        color_mode=color_mode,
        #subset='validation',
        class_mode="sparse",
        shuffle=False)
    
    #print (f"Class names: {class_names}")
    #print(f"Train: {train_generator.element_spec}")
    #print(f"Normalize: {normalize}")

    class_names  = ["non_person", "person"]
    return (train_generator, val_generator, test_generator, class_names)

# Choose dataset

In [22]:
def get_dataset(name, classes):
    if name == "lemon_quality":
        train_ds, val_ds, test_ds, class_names = get_lemon_quality_dataset(lemon_dataset_path, resolution, resolution, BATCH_SIZE, channels)
        dataset_name = "lemon_quality"

    elif name == "lemon_binary_datagen":
        train_ds, val_ds, test_ds, class_names = get_lemon_binary_datagen(None, resolution, resolution, BATCH_SIZE, channels, normalize=True)
        dataset_name = "lemon_binary_datagen"
    elif name == "vvw_minval":
        train_ds, val_ds, test_ds, class_names = get_vvw_minval_dataset(None, resolution, resolution, BATCH_SIZE, channels, normalize=True)
        dataset_name = "vvw_minval"
    elif name == "vvw_minval_fix":
        train_ds, val_ds, test_ds, class_names = get_vvw_minval_dataset_fix(None, resolution, resolution, BATCH_SIZE, channels, normalize=True)
        dataset_name = "vvw_minval_fix"


    elif name == "vvw_minval_datagen":
        train_ds, val_ds, test_ds, class_names = get_vvw_minval_datagen(None, resolution, resolution, BATCH_SIZE, channels, normalize=True)
        dataset_name = "vvw_minval_datagen"  
    elif name == "vvw_minval_datagen_fix":
        train_ds, val_ds, test_ds, class_names = get_vvw_minval_datagen_fix(vww_path, resolution, resolution, BATCH_SIZE, channels, normalize=True)
        dataset_name = "vvw_minval_datagen_fix"  
    else:
        print(f"Dataset {name} is not a valid dataset")
        train_ds, val_ds, test_ds, class_names, dataset_name = 0


    if len(class_names) != classes:
        print(f"Incompatible dataset and model. \n, \
            Model uses {classes} classes - dataset has {len(class_names)} classes!")
    else:
        pass
    
    return train_ds, val_ds, test_ds, class_names, dataset_name



In [23]:
train_ds, val_ds, test_ds, class_names, dataset_name = get_dataset(dataset, classes)

Found 87695 images belonging to 2 classes.
Found 10961 images belonging to 2 classes.
Found 10963 images belonging to 2 classes.


In [24]:
if dataset_name not in  ["vvw_minval_datagen", "vvw_minval_datagen_fix", "lemon_binary_datagen"]:

    tf.keras.backend.clear_session()

    # optimize the data flow
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.prefetch(AUTOTUNE)
    val_ds = val_ds.cache().prefetch(AUTOTUNE)

In [25]:
api = wandb.Api(timeout=19)

# Restore the model from wandb

In [27]:
#run_id = "fqubvbej"
# import wandb
# run = wandb.init()
# artifact = run.use_artifact('susbrock/mobilenetv1/run_3z2btl1i_model:v23', type='model')
# artifact_dir = artifact.download()

In [28]:
#run_id = get_vww_training_run_id_from_architecture(model_name) #get_model_DB_run_id_from_architecture(model_name)
run_id = "3z2btl1i"

In [31]:
run = wandb.init(
                # Set the project where this run will be logged
                project=PROJECT,
                name = model_name,
                id = run_id, 
                resume=True,
                #resume="must",
                sync_tensorboard=True
)
artifact = run.use_artifact('susbrock/mobilenetv1/run_3z2btl1i_model:v23', type='model')
#artifact = run.use_artifact(f"{ENTITY}/{PROJECT}/run_{run_id}_model:latest", type='model')
artifact_dir = artifact.download(root="temp")
# extract model filename
#model_filename = os.listdir(artifact_dir)[0]
model = tf.keras.models.load_model("temp")
model.name
run.finish()

[34m[1mwandb[0m:   4 of 4 files downloaded.  


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch/accuracy,0.79976
epoch/epoch,49.0
epoch/learning_rate,0.00025
epoch/loss,0.42799
epoch/lr,0.00025
epoch/val_accuracy,0.79573
epoch/val_loss,0.43048
test_accuracy,0.79987
test_loss,0.42842


run_1pxsd6er_model:latest

In [None]:
# print("evaluate on test dataset")
# train_ds, val_ds, test_ds, class_names, dataset_name = get_dataset(dataset, classes)
# results = model.evaluate(test_ds, batch_size=BATCH_SIZE)
# print("test loss, test acc:", results)



# Evaluate the downloaded model

# Conversion to TFLite

In [None]:
models_tflite_trained_path = models_dir.joinpath(model_name, f"{model_name}_trained.tflite")
models_tflite_opt_trained_path = models_dir.joinpath(model_name, f"{model_name}_INT8_trained.tflite")

In [None]:
# Convert the model to the TensorFlow Lite format without quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter = tf.lite.TFLiteConverter.from_saved_model(models_path)
tflite_model = converter.convert()


INFO:tensorflow:Assets written to: C:\Users\Susanne\AppData\Local\Temp\tmpvjs6b1tl\assets


INFO:tensorflow:Assets written to: C:\Users\Susanne\AppData\Local\Temp\tmpvjs6b1tl\assets


In [None]:

# Save the model.
with open(models_tflite_trained_path, "wb") as f:
    f.write(tflite_model)

# Conversion to TFLite with INT8 quantization

In [None]:
train_ds, val_ds, test_ds, class_names, dataset_name = get_dataset(dataset, classes)

Found 87695 images belonging to 2 classes.
Found 10961 images belonging to 2 classes.
Found 10963 images belonging to 2 classes.


In [None]:
# def convert_tflite_quant_INT8(model, data_generator):
#     converter_opt = tf.lite.TFLiteConverter.from_keras_model(model)

#     # set the optimization flag
#     converter_opt.optimizations = [tf.lite.Optimize.DEFAULT]
#     # enforce integer only quantization
#     converter_opt.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
#     converter_opt.inference_input_type = tf.int8
#     converter_opt.inference_output_type = tf.int8

#     # provide a representative dataset for quantization
#     converter_opt.representative_dataset = data_generator

#     tflite_model_opt = converter_opt.convert()

#     return tflite_model_opt

In [None]:
def representative_data_gen():
    for i in range(20):
    #   for sample, _ in dataset.validation_dataset():
        test_image, y_true = next(test_ds)
        yield [np.expand_dims(test_image[0], axis=0)]
        #yield [test_image[0]]

converter_opt = tf.lite.TFLiteConverter.from_keras_model(model)

# set the optimization flag
converter_opt.optimizations = [tf.lite.Optimize.DEFAULT]
# enforce integer only quantization
converter_opt.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]


# https://github.com/tensorflow/tensorflow/issues/53293: uint is no longer supported!
#converter_opt.inference_input_type = tf.uint8
#converter_opt.inference_output_type = tf.uint8
converter_opt.inference_input_type = tf.int8
converter_opt.inference_output_type = tf.int8

# provide a representative dataset for quantization
converter_opt.representative_dataset = representative_data_gen

tflite_model_opt = converter_opt.convert()

# Save the model.
with open(models_tflite_opt_trained_path , 'wb') as f:
  f.write(tflite_model_opt)


INFO:tensorflow:Assets written to: C:\Users\Susanne\AppData\Local\Temp\tmpeitfkicx\assets


INFO:tensorflow:Assets written to: C:\Users\Susanne\AppData\Local\Temp\tmpeitfkicx\assets


# Run the TensorFlow Lite models


In [None]:
#@tf.function
def tflite_predict(model_path, test_image):
    # Initialize the interpreter
    interpreter = tf.lite.Interpreter(model_path=str(model_path))
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()[0]
    output_details = interpreter.get_output_details()[0]

    #print(input_details['dtype'])

    # Check if the input type is quantized, then rescale input data to uint8
    if input_details['dtype'] == np.int8:  # was np.uint8
        input_scale, input_zero_point = input_details["quantization"]
        #print(f"input_scale {input_scale}")
        #print(f"input_zero_point {input_zero_point}")
        test_image = test_image / input_scale + input_zero_point
        #print(test_image)


    test_image = test_image.astype(input_details["dtype"])
    #
    interpreter.set_tensor(input_details["index"], test_image)
    #interpreter.set_tensor(input_details["index"], np.expand_dims(test_image[0], axis=0)) # only needed when input shape (96, 96, 3)
    interpreter.invoke()
    output = interpreter.get_tensor(output_details["index"])[0]
    #dequantized_output = (output - input_zero_point) * input_scale
    prediction =output.argmax()
    #print(f"Prediction: Class {prediction} derived from {output}")

    return prediction

In [None]:
# test_image, test_label = next(test_ds)
# test_label

In [None]:
# tflite_result = tflite_predict(models_tflite_trained_path , test_image)
# tflite_result

In [None]:
# tflite_result = tflite_predict(models_tflite_opt_trained_path , test_image)
# tflite_result

In [None]:

# def tflite_predict_on_dataset(model_path, dataset):
#     # find length of dataset
#     test_gen = dataset.as_numpy_iterator()
#     num_images = len(list(test_gen))

#     predictions = []
#     y_trues = []

#     test_gen = dataset.as_numpy_iterator()
#     accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
    
#     # iterate over the complete test_set
#     for i in range(num_images):
#         test_image, y_true = next(test_gen)
#         prediction = tflite_predict(model_path, test_image)
#         predictions.append(prediction)
#         y_trues.append(y_true[0])
#         #accuracy.update_state(y_true, prediction) # TODO: correct accuracy
#         print(f"{i}, {test_image.shape} - true label: {y_true[0]} vs {tflite_result}")

#     #accuracy = (np.sum(predictions == y_trues) * 100) / num_images
#     print(f"Accuracy: {accuracy.result()} - (Number of test samples: {num_images})")
#     return predictions, y_trues    

In [None]:
#@tf.function
def tflite_predict_on_datagen(model_path, dataset):
    # find length of dataset
    test_gen = dataset#.as_numpy_iterator()
    num_images = test_gen.samples

    predictions = []
    y_trues = []

    # iterate over the complete test_set
    for i in range(num_images):
        test_image, y_true = next(test_gen)
        prediction = tflite_predict(model_path, test_image)
        #print(prediction)
        predictions.append(prediction)
        y_trues.append(y_true[0])
        #print(f"{i}, {test_image.shape} - true label: {y_true[0]} vs {prediction}")

    accuracy = (np.sum(np.array(predictions) == np.array(y_trues)) * 100) / num_images
    
    print(f"Accuracy: {accuracy} - (Number of test samples: {num_images})")
    return accuracy, predictions, y_trues    

In [None]:
train_ds, val_ds, test_ds, class_names, dataset_name = get_dataset(dataset, classes)

Found 87695 images belonging to 2 classes.
Found 10961 images belonging to 2 classes.
Found 10963 images belonging to 2 classes.


In [None]:
tflite_accuracy, predictions_tflite, y_trues_tflite = tflite_predict_on_datagen(models_tflite_trained_path, test_ds)

Accuracy: 70.59199124327283 - (Number of test samples: 10963)


In [None]:
def evaluate_model(model):
    train_ds, val_ds, test_ds, class_names, dataset_name = get_dataset(dataset, classes)
    accuracy, predictions, y_trues = tflite_predict_on_datagen(models_tflite_trained_path, test_ds)
    return accuracy, predictions, y_trues

In [None]:
tflite_accuracy, predictions_tflite, y_trues_tflite = evaluate_model(models_tflite_trained_path)

Found 87695 images belonging to 2 classes.
Found 10961 images belonging to 2 classes.
Found 10963 images belonging to 2 classes.
Accuracy: 70.59199124327283 - (Number of test samples: 10963)


# Model evaluation -TFLite

In [None]:
accuracy = accuracy_score(y_true=y_trues_tflite, y_pred=predictions_tflite)
print(f"accuracy: {accuracy}")
AUC = roc_auc_score(y_true=y_trues_tflite, y_score=predictions_tflite)
print(f"AUC score:{AUC}")
F1 = f1_score(y_true=y_trues_tflite, y_pred=predictions_tflite)
print(f"F1 score:{F1}")

accuracy: 0.7059199124327282
AUC score:0.7106222749980629
F1 score:0.7402513696422818


In [None]:
confusion_mtx_tflite = tf.math.confusion_matrix(y_trues_tflite, predictions_tflite, num_classes=classes)
confusion_mtx_tflite

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[3145, 2504],
       [ 720, 4594]])>

In [None]:
# def show_confusion_matrix(cm, labels):
#   plt.figure(figsize=(6, 6))
#   sns.heatmap(cm, xticklabels=labels, yticklabels=labels, 
#               annot=True, fmt='g',
#               cmap="Blues"
#               )
#   plt.xlabel('Prediction')
#   plt.ylabel('Label')
#   plt.show()
#   return plt

#   my_plot = show_confusion_matrix(confusion_mtx_tflite, class_names)

In [None]:
def plotly_confusion_matrix(confusion_matrix, title=""):
    global model_name
    fig = go.Figure(data=go.Heatmap(
                    z=confusion_matrix,
                    x=class_names,
                    y=class_names,
                    colorscale="Blues",
                    hoverongaps = False),
                    )
    fig.update_traces(text=confusion_mtx_tflite, texttemplate="%{text}", hovertemplate=None)
    #fig.update_layout(title=go.layout.Title(text=f'TFLite Confusion Matrix {title}'),
    fig.update_layout(title=go.layout.Title(text=f'Confusion Matrix - {title}<br>{model_name}'),
                                xaxis_title='Prediction',
                                yaxis_title='Label',
                                height=550,
                                width=600,
                                margin=dict(l=120, r=20, t=90, b=20),
                                )
    fig.update_yaxes(autorange="reversed")
    fig.show()
    return fig

confusion_matrix_tflite = plotly_confusion_matrix(confusion_mtx_tflite, title="TFLite")

# Model evaluation - TFLite INT8

In [None]:
tflite_INT8_accuracy, predictions_tflite_INT8, y_trues_tflite_INT8 = evaluate_model(models_tflite_opt_trained_path)

Found 87695 images belonging to 2 classes.
Found 10961 images belonging to 2 classes.
Found 10963 images belonging to 2 classes.
Accuracy: 70.59199124327283 - (Number of test samples: 10963)


In [None]:
accuracy_INT8 = accuracy_score(y_true=y_trues_tflite_INT8, y_pred=predictions_tflite_INT8)
print(f"accuracy: {accuracy_INT8}")
AUC_INT8 = roc_auc_score(y_true=y_trues_tflite_INT8, y_score=predictions_tflite_INT8)
print(f"AUC score:{AUC_INT8}")
F1_INT8 = f1_score(y_true=y_trues_tflite_INT8, y_pred=predictions_tflite_INT8)
print(f"F1 score:{F1_INT8}")

accuracy: 0.7059199124327282
AUC score:0.7106222749980629
F1 score:0.7402513696422818


In [None]:
confusion_mtx_tflite_INT8 = tf.math.confusion_matrix(y_trues_tflite_INT8, predictions_tflite_INT8, num_classes=classes)
confusion_mtx_tflite_INT8

confusion_matrix_tflite_INT8 = plotly_confusion_matrix(confusion_mtx_tflite_INT8, title="TFLite INT8")

# Check for Quantization Errors

In [None]:
quantization_diff = (np.sum(np.array(predictions_tflite) != np.array(predictions_tflite_INT8)))
quantization_diff

0

# Logging to wandb

In [None]:
eval_metrics ={}
eval_metrics["accuracy_INT8"] = accuracy_INT8
eval_metrics["AUC_INT8"] = AUC_INT8
eval_metrics["F1_INT8"] = F1_INT8
eval_metrics["accuracy_tflite"] = accuracy_INT8
eval_metrics["AUC_tflite"] = AUC_INT8
eval_metrics["F1_tflite"] = F1_INT8
eval_metrics["quantization_diff"] = quantization_diff

In [None]:
# Generate run ids
#id = wandb.wandb.sdk.lib.runid.generate_id()

id = get_model_DB_run_id_from_architecture(model_name)
PROJECT = "model_DB"

run = wandb.init(
        # Set the project where this run will be logged
        project=PROJECT, 
        id = id, 
        resume="allow",
        )

run.log({"confusion_matrix_tflite_INT8" : confusion_matrix_tflite_INT8,
        "confusion_matrix_tflite" : confusion_matrix_tflite})

run.log(eval_metrics)

wandb.finish()

VBox(children=(Label(value='0.020 MB of 0.075 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.265514…

0,1
AUC_INT8,▁
AUC_tflite,▁
F1_INT8,▁
F1_tflite,▁
accuracy_INT8,▁
accuracy_tflite,▁
quantization_diff,▁

0,1
AUC_INT8,0.71062
AUC_tflite,0.71062
F1_INT8,0.74025
F1_tflite,0.74025
accuracy_INT8,0.70592
accuracy_tflite,0.70592
quantization_diff,0.0
test_accuracy,
