In [1]:

import os
import argparse
import logging
import base64
import sys 

from datetime import datetime

from PIL import Image
from io import BytesIO
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers, models



2022-08-20 11:45:35.389718: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
print(tf.__version__)
print(sys.version)

2.9.1
3.8.10 (default, Jun 22 2022, 20:18:18) 
[GCC 9.4.0]


In [3]:
"""Model to classify draft beers

This file contains all the model information: the training steps, the batch
size and the model itself.
"""



def get_batch_size():
    """Returns the batch size that will be used by your solution.
    It is recommended to change this value.
    """
    return 25

def get_epochs():
    """Returns number of epochs that will be used by your solution.
    It is recommended to change this value.
    """
    return 201

def solution(input_layer=None):
    """Returns a compiled model.

    This function is expected to return a model to identity the different beers.
    The model's outputs are expected to be probabilities for the classes and
    and it should be ready for training.
    The input layer specifies the shape of the images. The preprocessing
    applied to the images is specified in data.py.

    Add your solution below.

    Parameters:
        input_layer: A tf.keras.layers.InputLayer() specifying the shape of the input.
            RGB colored images, shape: (width, height, 3)
    Returns:
        model: A compiled model
    """


    model = models.Sequential()
    # data augmentation
    model.add(layers.RandomFlip('horizontal'))
    model.add(layers.RandomRotation(0.25))
    model.add(layers.RandomZoom(0.25))
    # conv layers
    # use dropout for regularization
    model.add(layers.Conv2D(16, (5, 5), strides=2,
                            activation='relu', input_shape=(160, 160, 6)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(.2))
    model.add(layers.Conv2D(32, (5, 5), strides=2, activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(.2))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(.2))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    # fcnn
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu'))
    # use softmax as model is predicting classes
    model.add(layers.Dense(5, activation='softmax'))

    # model.compile(optimizer='adam',
    #           loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    #           metrics=['accuracy'])

    return model

class CropImageLayer(tf.keras.layers.Layer):
    """
        keras Layer which crops and resizes input images to the highest 
        ranking bottle detection
    """
    def __init__(self, **kwargs):
        """
        init of super class
        """
        super().__init__(**kwargs)


    def call(self, input, boxes, scores, classes):
        """
        Crops and resizes input images such that they concat the bottle detection and the glass detection
            with the highest detection score
        Parameters:
            input: input tensor of shape (batch_size, width, height, 3)
            boxes: np.array of shape (batch_size, classification_per_image, 4) containing
                coordinates of detection boxes
            classes: np.array of shape (batch_size, classification_per_image) containig
                class ids of the classifications
        Returns:
            tensor of shape (batch_size, width, height, 6)
            images respectively cropped and resized to the best bottle and glass detection box
        """
        boxes_filtered = []
        box_ind = []
        BOTTLE_ID = 44 # bottle id for object detection
        GLASS_ID = 46 # bottle id for object detection
        bottle_boxes_filtered, glass_boxes_filtered = [], []
        bottle_box_ind, glass_box_ind = [], []
        tf.print("="*80)
        tf.print("boxes: " + str(boxes))
        tf.print("boxes.shape: " + str(boxes.shape))
        # During model creation boxes.shape[0] is None as model gets created
        # Return therefore just concatenated input
        if not boxes.shape[0]:
            return tf.concat([input, input], axis = 3)

        for i in range(boxes.shape[0]): #batch
            for j in range(boxes.shape[1]): # detections
                # if classes[i][j] == BOTTLE_ID:
                    # print("(i,j):(" + str(i) + "," + str(j) + ")" + " score: " + str(scores[i][j].numpy()) + " class: " + str(classes[i][j].numpy()))
                if classes[i][j] == BOTTLE_ID and scores[i][j] > 0.5:
                    bottle_box = boxes[i][j].numpy() / 159
                    break
            # do not crop if no bottle was detected
            if j == boxes.shape[1] - 1:
                bottle_box = np.array([0, 0, 1, 1])
            bottle_box_ind.append(i)
            bottle_boxes_filtered.append(bottle_box)

        for i in range(boxes.shape[0]): #batch
            for j in range(boxes.shape[1]): # detections
                # if classes[i][j] == GLASS_ID:
                    # print("(i,j):(" + str(i) + "," + str(j) + ")" + " score: " + str(scores[i][j].numpy()) + " class: " + str(classes[i][j].numpy()))
                if classes[i][j] == GLASS_ID and scores[i][j] > 0.5:
                    glass_box = boxes[i][j].numpy() / 159
                    break
            # do not crop if no bottle was detected
            if j == boxes.shape[1] - 1:
                glass_box = np.array([0, 0, 1, 1])
            glass_box_ind.append(i)
            glass_boxes_filtered.append(glass_box)

        batch_glass_cropped = tf.image.crop_and_resize(input, glass_boxes_filtered, glass_box_ind, (160, 160))
        batch_bottle_cropped = tf.image.crop_and_resize(input, bottle_boxes_filtered, bottle_box_ind, (160, 160))
        batch_data_cropped = tf.concat([batch_glass_cropped, batch_bottle_cropped], axis = 3)
        return batch_data_cropped


class ComposedModel(tf.keras.Model):
    """
    Model composed of a pretrained object detector and a prediction head trained on the image dataset
    prediction head loaded from saved model, was trained in a previous step
    """
    def __init__(self):
        super(ComposedModel, self).__init__()
        # path to efficientdet model on tf hub
        model_handle = "https://tfhub.dev/tensorflow/efficientdet/lite3/detection/1"
        # path of prediction head model weights
        prediction_head_path = "prediction_head_model"
        self.efficientdet = hub.KerasLayer(model_handle, trainable=False)
        self.crop_image_layer = CropImageLayer()
        self.prediction_head = tf.saved_model.load(prediction_head_path)


    def call(self, inputs):
        """
            Compute classification scores for the input batch
            Parameters:
                inputs: input tensor to be classified of shape (batch_size, height, width, 3)
            Return: softmax predictions of shape (batch_size, number_of_classes)
        """
        # convert to uint8 from float32 as efficientdet expects image pixels in [0,256]
        inputs_uint8 = tf.image.convert_image_dtype(inputs, dtype=tf.uint8, saturate=False)
        # perform object detection on input to obtain boxes and classes of classification candidates
        boxes, scores, classes, _ = self.efficientdet(inputs_uint8)
        # crop inputs respectively to bottles with highest classification score
        inputs_cropped = self.crop_image_layer(inputs, boxes, scores, classes)
        # compute predictions for respective image classes
        result = self.prediction_head(inputs_cropped)
        return result


In [4]:
def test_exported_model():
    """
    Compare the model directly after training to the exported model.

    Even though the models receive the same image (just in different formats) the predictions differ.
    The error therefore probably comes from the export of the model

    # """
    # create composed model, the solution to the coding challenge
    ml_model = ComposedModel()
    ml_model.compile(optimizer='adam',
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
            metrics=['accuracy'])
    time_str = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")

    basename = "composed_model_" + time_str
    # export to "output/composed_model"
    # ml_model_reloaded = tf.saved_model.load(os.path.join("output", basename))
    # open test image
    with open("0chimayblue_000.jpg", "rb") as img_file:
        # open test image encoded in base64
        img_base64 = base64.b64encode(img_file.read(), altchars=str.encode("-_"))
    # create image from base64 encoded string
    img = Image.open(BytesIO(base64.b64decode(img_base64, altchars=str.encode("-_"))))
    # from [0, 255] -> [0, 1]
    img = np.array(img) / 255.
    img = tf.expand_dims(img, axis=0)
    # convert base64 encoded image to string tensor
    predictions = ml_model(img, training=False)  # predictions for trained model
    ml_model.save(os.path.join("output", basename), save_format = "tf")
    # reload model which has just been exported
    ml_model_reloaded = tf.keras.models.load_model(os.path.join("output", basename))
    ml_model_reloaded.compile(optimizer='adam',
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=['accuracy'])
    tf.print(ml_model_reloaded)
    predictions_reloaded = ml_model_reloaded(img, training=False)  # predictions for trained model which was exported and then reimported
    print("trained model: " + str(predictions.numpy())) #output: [[9.7456181e-01 2.0527570e-04 1.9158632e-02 1.0962408e-03 4.9780323e-03]]
    print("served model: " + str(predictions_reloaded.numpy())) #output:  [[0.00636891 0.9207034  0.00868604 0.00923812 0.05500359]]



In [5]:
tf_logger = logging.getLogger("tensorflow")
tf_logger.setLevel(logging.INFO)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(tf_logger.level // 10)
# run test
test_exported_model()

2022-08-20 11:45:37.756213: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-20 11:45:37.810439: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/skerres/.local/lib/python3.8/site-packages/cv2/../../lib64:/usr/include:/usr/local/lib:/usr/local/cuda-10.1/targets/x86_64-linux/lib:/usr/local/cuda-11.0/targets/x86_64-linux/lib
2022-08-20 11:45:37.810456: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platfor

boxes: tf.Tensor(
[[[3.94008160e-01 4.05479813e+01 6.24505920e+01 6.73040085e+01]
  [6.03947639e-01 7.00556107e+01 6.71987610e+01 9.62328339e+01]
  [3.17409039e-01 1.47847090e+01 5.69000549e+01 4.08496933e+01]
  [1.14425850e+01 2.96565056e+00 1.59142944e+02 1.59820374e+02]
  [3.56609001e+01 1.21625359e+02 6.99773331e+01 1.58188080e+02]
  [1.61480665e+00 9.51249619e+01 4.69383240e+01 1.59178757e+02]
  [8.04954767e-02 3.00335884e-01 1.57853222e+01 1.64636307e+01]
  [0.00000000e+00 7.17967758e+01 1.76246948e+01 1.16832848e+02]
  [6.02782898e+01 6.11905813e+00 8.56941223e+01 5.66452026e+01]
  [0.00000000e+00 5.93407364e+01 1.36608496e+01 8.56794281e+01]
  [1.16609512e+02 1.01068100e+02 1.59503967e+02 1.58759552e+02]
  [7.72538300e+01 1.35561127e+02 9.49364395e+01 1.57361465e+02]
  [0.00000000e+00 1.22470840e+02 1.52644730e+01 1.39754181e+02]
  [7.59778290e+01 7.99659958e+01 1.05999672e+02 1.04677963e+02]
  [0.00000000e+00 3.32862968e+01 1.86418324e+01 5.09915161e+01]
  [7.65657043e+01 1.02



INFO:tensorflow:Assets written to: output/composed_model_2022-08-20-11-45-49/assets


INFO:tensorflow:Assets written to: output/composed_model_2022-08-20-11-45-49/assets


<keras.saving.saved_model.load.ComposedModel object at 0x7f6d7180a460>
boxes: Tensor("keras_layer/StatefulPartitionedCall:0", shape=(None, 100, 4), dtype=float32)
boxes.shape: (None, 100, 4)
trained model: [[9.7456181e-01 2.0527570e-04 1.9158632e-02 1.0962408e-03 4.9780323e-03]]
served model: [[0.00636891 0.9207034  0.00868604 0.00923812 0.05500359]]
