# 05 - Model Evaluation

This notebook creates two components that runs evaluation result on test data, and check if the metrics passed preset threshold before deployment.

In [None]:
import os
import time
import logging
import kfp
from google.cloud import bigquery, storage
from google.cloud import aiplatform as vertex_ai
from google_cloud_pipeline_components.experimental.custom_job import utils
from kfp.v2 import compiler, dsl
from kfp.v2.dsl import component
from typing import NamedTuple
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output, Metrics,
                        OutputPath, component)

from google_cloud_pipeline_components.experimental.custom_job import utils

In [None]:
logging.basicConfig(level=logging.INFO)

## Load Params and Resource Config

In [None]:
from config.gcp_resource import *

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    
if SERVICE_ACCOUNT == "" or SERVICE_ACCOUNT is None or SERVICE_ACCOUNT == "[your-service-account]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.account)' 2>/dev/null
    SERVICE_ACCOUNT = shell_output[0]
    
if GCS_BUCKET == "" or GCS_BUCKET is None or GCS_BUCKET == "[your-bucket-name]":
    # Get your bucket name to GCP projet id
    GCS_BUCKET = PROJECT_ID
    # Try to create the bucket if it doesn'exists
    ! gsutil mb -l $REGION gs://$BUCKET
    print("")
    
!gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [None]:
print("Train machine type", TRAIN_COMPUTE)
print("Deploy machine type", DEPLOY_COMPUTE)
print("Deployment:", DEPLOY_IMAGE)
print('PIPELINE_ROOT: {}'.format(PIPELINE_ROOT))
print('MODULE_ROOT: {}'.format(MODULE_ROOT))
print('DATA_ROOT: {}'.format(DATA_ROOT))
print('SERVING_MODEL_DIR: {}'.format(SERVING_MODEL_DIR))

Train machine type n1-standard-4
Deploy machine type n1-standard-4
Deployment: us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest
PIPELINE_ROOT: gs://mle_airbus_dataset/airbusmlepipeline/pipeline_root
MODULE_ROOT: gs://mle_airbus_dataset/airbusmlepipeline/pipeline_module
DATA_ROOT: gs://mle_airbus_dataset/airbusmlepipeline/data
SERVING_MODEL_DIR: gs://mle_airbus_dataset/airbusmlepipeline/serving_model


## Model Evaluation Component

In [19]:
from kfp.v2.dsl import ClassificationMetrics, Metrics, Output, component

@component(
    packages_to_install=["tensorflow", "numpy", "pandas", "google-cloud-storage", "fsspec", "pyarrow"],
    base_image="python:3.7",
    output_component_file="./build/model_eval_component.yaml")
def model_eval_component(
    test_filepath: str,
    model_filepath: str,
    metrics: Output[Metrics]
):
    import tensorflow.keras.backend as K
    import tensorflow as tf
    import numpy as np
    import pandas as pd
    from tensorflow.keras import layers
    from tensorflow.keras.optimizers import Adam
    from tensorflow.keras.losses import binary_crossentropy,BinaryCrossentropy 
    from google.cloud import storage


    class Augment(tf.keras.layers.Layer):
        def __init__(self,  resize_shape=(768, 768), train=True, seed=42):
            super().__init__()
        # both use the same seed, so they'll make the same random changes.
            seed = np.random.randint(1000)
            if train:
                self.augment_inputs = tf.keras.Sequential(
                                        [
                                            layers.experimental.preprocessing.RandomFlip(seed=seed),
                                            layers.experimental.preprocessing.RandomRotation(0.1, seed=seed),
                                            layers.experimental.preprocessing.RandomHeight(0.1, seed=seed),
                                            layers.experimental.preprocessing.RandomWidth(0.1, seed=seed),
                                            layers.experimental.preprocessing.RandomZoom(0.9, seed=seed),
                                            layers.experimental.preprocessing.Rescaling(1.0 / 255),
                                            layers.experimental.preprocessing.Resizing(resize_shape[0], resize_shape[0])
                                        ]
                                    )

                self.augment_labels = tf.keras.Sequential(
                                        [
                                            layers.experimental.preprocessing.RandomFlip(seed=seed),
                                            layers.experimental.preprocessing.RandomRotation(0.1, seed=seed),
                                            layers.experimental.preprocessing.RandomHeight(0.1, seed=seed),
                                            layers.experimental.preprocessing.RandomWidth(0.1, seed=seed),
                                            layers.experimental.preprocessing.RandomZoom(0.9, seed=seed),
                                            layers.experimental.preprocessing.Resizing(resize_shape[0], resize_shape[0])
                                        ]
                                    )
            else:
                self.augment_inputs = tf.keras.Sequential(
                                        [
                                            layers.experimental.preprocessing.Rescaling(1.0 / 255),
                                            layers.experimental.preprocessing.Resizing(resize_shape[0], resize_shape[0])
                                        ]
                                    )

                self.augment_labels = tf.keras.Sequential(
                                        [
                                            layers.experimental.preprocessing.Resizing(resize_shape[0], resize_shape[0])
                                        ]
                                    )       

        def call(self, inputs, labels):
            inputs = self.augment_inputs(inputs)
            labels = self.augment_labels(labels)
            return inputs, labels
    
    def dice_coef(y_true, y_pred, smooth=1):
        intersection = K.sum(y_true * y_pred, axis=[1,2,3])
        union = K.sum(y_true, axis=[1,2,3]) + K.sum(y_pred, axis=[1,2,3])
        return K.mean( (2. * intersection + smooth) / (union + smooth), axis=0)

    def dice_p_bce(in_gt, in_pred):
        return 1e-3*binary_crossentropy(in_gt, in_pred) - dice_coef(in_gt, in_pred)

    def true_positive_rate(y_true, y_pred):
        return K.sum(K.flatten(y_true)*K.flatten(K.round(y_pred)))/K.sum(y_true)
    
    #TODO: How to improve these functions ?
    def rle_decode_tf(mask_rle, shape=(768, 768)):

        shape = tf.convert_to_tensor(shape, tf.int64)
        size = tf.math.reduce_prod(shape)
        # Split string
        s = tf.strings.split(mask_rle)
        s = tf.strings.to_number(s, tf.int64)
        # Get starts and lengths
        starts = s[::2] - 1
        lens = s[1::2]
        # Make ones to be scattered
        total_ones = tf.reduce_sum(lens)
        ones = tf.ones([total_ones], tf.uint8)
        # Make scattering indices
        r = tf.range(total_ones)
        lens_cum = tf.math.cumsum(lens)
        s = tf.searchsorted(lens_cum, r, 'right')
        idx = r + tf.gather(starts - tf.pad(lens_cum[:-1], [(1, 0)]), s)
        # Scatter ones into flattened mask
        mask_flat = tf.scatter_nd(tf.expand_dims(idx, 1), ones, [size])
        return tf.expand_dims(tf.transpose(tf.reshape(mask_flat, shape)), axis=2)

    def parse_db_to_img(filename, label):
        file_path = filename
        img = tf.io.read_file(file_path)
        image = tf.image.decode_jpeg(img, channels=3)
        label_img = rle_decode_tf(label)

        return image, label_img

    IMG_SHAPE=(128,128)
    GCS_BUCKET="mle_airbus_dataset"
    BATCH_SIZE = 16
    EDGE_CROP = 16
    NB_EPOCHS = 10
    GAUSSIAN_NOISE = 0.1
    UPSAMPLE_MODE = 'SIMPLE'
    # downsampling inside the network
    NET_SCALING = None
    # downsampling in preprocessing
    IMG_SCALING = (1, 1)
    # number of validation images to use
    VALID_IMG_COUNT = 10
    # maximum number of steps_per_epoch in training
    MAX_TRAIN_STEPS = 200
    AUGMENT_BRIGHTNESS = False
    N_SAMPLE = 100
    bucket = storage.Client().bucket(GCS_BUCKET)

    blob = bucket.blob("test.parquet")
    blob.download_to_filename("test.parquet")

    valid_df = pd.read_parquet(f"test.parquet")
    validation = tf.data.Dataset.from_tensor_slices((valid_df['ImageId'].values, valid_df['EncodedPixels'].values))
    validation = validation.shuffle(buffer_size=10)
    validation = validation.map(lambda x, y: parse_db_to_img("gs://mle_airbus_dataset/train_v2/" + x, y))
    validation = validation.batch(BATCH_SIZE)
    validation = validation.map(Augment(resize_shape=IMG_SHAPE, train=False))
    validation = validation.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    model_eval = tf.keras.models.load_model(f'gs://{GCS_BUCKET}/trained_model/segm_full_200_20220626-143859/', compile=False)
    model_eval.compile(optimizer=Adam(1e-4, decay=1e-6), loss=dice_p_bce, metrics=[dice_coef, 'binary_accuracy', true_positive_rate])
    result = model_eval.evaluate(validation)
    metrics.log_metric("dice_coef", (result[1]))
    metrics.log_metric("binary_accuracy", (result[2]))
    metrics.log_metric("true_positive_rate", (result[3]))

## Model Evaluation Unit Test

In [8]:
@component(
    base_image="python:3.7",
    output_component_file="./build/model_eval_test_component.yaml")
def model_eval_test_component(
    metrics: Input[Metrics]
):
    """
    Unit test component that checks if the output metrics passed
    thresholds
    """
    import logging
    
    metrics_thresholds = {
        'dice_coef': 0.1,
        'binary_accuracy': 0.8,
        'true_positive_rate': 0.3
    }
    
    for k, v in metrics.metadata.items():
        assert v >= metrics_thresholds[k]
        logging.info(f"{k}:{v}, threshold: {metrics_thresholds[k]}. Passed.")
