In [None]:
!pip install -q opendatasets tfx

In [2]:
# @title Import Library

import cv2,os,re, string,shutil
import opendatasets as od
import gdown
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tfx.components import ImportExampleGen, CsvExampleGen, StatisticsGen, SchemaGen, ExampleValidator, Transform, Trainer, Evaluator, Tuner, InfraValidator, Pusher
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy
from tfx.types import Channel
from tfx.types.standard_artifacts import Model, ModelBlessing
import tensorflow_model_analysis as tfma

from tfx.v1.proto import ServingSpec, TensorFlowServing, LocalDockerConfig

In [3]:
# @title Download dataset

if not os.path.exists("data" and "modules"):
  os.makedirs('data')
  os.makedirs('modules')

# download kaggle
gdown.download('https://drive.google.com/uc?id=1nrWT28FKWgKaIWQsTIWj1IkDuN1F-pF2', '/content/', quiet=False)
od.download("https://www.kaggle.com/datasets/nandhanasuresh/allergen-status-of-food-products")

Downloading...
From: https://drive.google.com/uc?id=1nrWT28FKWgKaIWQsTIWj1IkDuN1F-pF2
To: /content/kaggle.json
100%|██████████| 65.0/65.0 [00:00<00:00, 131kB/s]


Dataset URL: https://www.kaggle.com/datasets/nandhanasuresh/allergen-status-of-food-products
Downloading allergen-status-of-food-products.zip to ./allergen-status-of-food-products


100%|██████████| 7.66k/7.66k [00:00<00:00, 8.64MB/s]







In [4]:
df =  pd.read_csv("/content/allergen-status-of-food-products/Allergen_Status_of_Food_Products.csv")
df = df.dropna()
df = df.rename(columns={'Main Ingredient':'Main_Ingredient','Fat/Oil':'Fat_Oil','Price ($)':'Price', 'Customer rating (Out of 5)':'Rating','Prediction':'is_allergen'})
df['is_allergen']=df['is_allergen'].apply(lambda x:1 if x == 'Contains' else 0)
df.to_csv('/content/data/data.csv', index=False)

In [5]:
TRANSFORM_MODULE_FILE = "modules/allergen_food_transform.py"
TUNER_MODULE_FILE = "modules/allergen_food_tuner.py"
TRAINER_MODULE_FILE = "modules/allergen_food_trainer.py"

In [6]:
# @title transform module
%%writefile {TRANSFORM_MODULE_FILE}

import tensorflow as tf
import tensorflow_transform as tft

CATEGORICAL_FEATURES = {
    "Main_Ingredient": 101,
    "Sweetener":10,
    "Fat_Oil":36,
    "Seasoning":186,
    "Allergens":40
}
NUMERICAL_FEATURES = [
    "Price",
    "Rating"
]
LABEL_KEY = "is_allergen"


def transformed_name(key):
    """Renaming transformed features"""
    return key + "_xf"


def convert_num_to_one_hot(label_tensor, num_labels=2):
    """
    Convert a label (0 or 1) into a one-hot vector
    Args:
        int: label_tensor (0 or 1)
    Returns
        label tensor
    """
    one_hot_tensor = tf.one_hot(label_tensor, num_labels)
    return tf.reshape(one_hot_tensor, [-1, num_labels])


def preprocessing_fn(inputs):
    """
    Preprocess input features into transformed features

    Args:
        inputs: map from feature keys to raw features.

    Return:
        outputs: map from feature keys to transformed features.
    """

    outputs = {}

    for key in CATEGORICAL_FEATURES:
        dim = CATEGORICAL_FEATURES[key]
        int_value = tft.compute_and_apply_vocabulary(
            inputs[key], top_k=dim + 1
        )
        outputs[transformed_name(key)] = convert_num_to_one_hot(
            int_value, num_labels=dim + 1
        )

    for feature in NUMERICAL_FEATURES:
        outputs[transformed_name(feature)] = tft.scale_to_0_1(inputs[feature])

    outputs[transformed_name(LABEL_KEY)] = tf.cast(inputs[LABEL_KEY], tf.int64)

    return outputs


Writing modules/allergen_food_transform.py


In [7]:
# @title Tuner module
%%writefile {TUNER_MODULE_FILE}

from typing import NamedTuple, Dict, Any
from allergen_food_transform import (
    CATEGORICAL_FEATURES,
    LABEL_KEY,
    NUMERICAL_FEATURES,
    transformed_name,
)
from kerastuner import RandomSearch
from kerastuner.engine import base_tuner
import tensorflow as tf
from tfx.components.trainer.fn_args_utils import FnArgs
import tensorflow_transform as tft

TunerFnResult = NamedTuple('TunerFnResult', [('tuner', base_tuner.BaseTuner),
                                             ('fit_kwargs', Dict[str, Any])])


def get_model(hp):
    """
    This function defines a Keras model and returns the model as a
    Keras object.
    Args:
        int: hp
    Returns
        tensorflow model
    """

    # one-hot categorical features
    input_features = []
    for key, dim in CATEGORICAL_FEATURES.items():
        input_features.append(
            tf.keras.Input(shape=(dim + 1,), name=transformed_name(key))
        )

    for feature in NUMERICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    concatenate = tf.keras.layers.concatenate(input_features)
    deep = tf.keras.layers.Dense(256, activation="relu")(concatenate)
    # Tuning the number of hidden layers
    for _ in range(hp.Int('num_hidden_layers', 1, 5)):
        deep = tf.keras.layers.Dense(units=hp.Choice('units', values=[32, 64, 128, 256]), activation='relu')(deep)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(deep)

    model = tf.keras.models.Model(inputs=input_features, outputs=outputs)
    model.compile(optimizer=tf.keras.optimizers.Adam(hp.Choice('learning_rate', values=[0.001, 0.01, 0.1])),
                  loss='binary_crossentropy',
                  metrics=[tf.keras.metrics.BinaryAccuracy()])

    return model


def gzip_reader_fn(filenames):
    """Loads compressed data
    Args:
        str: filenames
    Returns
        TFRecordDataset
    """
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')


def input_fn(file_pattern, tf_transform_output, batch_size=64):
    """Generates features and labels for tuning/training.
    Args:
        file_pattern: input tfrecord file pattern.
        tf_transform_output: A TFTransformOutput.
        batch_size: representing the number of consecutive elements of
        returned dataset to combine in a single batch
    Returns:
        A dataset that contains (features, indices) tuple where features
        is a dictionary of Tensors, and indices is a single Tensor of
        label indices.
    """
    transformed_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transformed_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
    )

    return dataset


def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """This is what TFX will run
    Args:
        FnArgs: fn_args
    Returns
        TunerFnResult
    """
    tuner = RandomSearch(
        get_model,
        objective='val_binary_accuracy',
        max_trials=10,
        executions_per_trial=1,
        directory=fn_args.working_dir,
        project_name='random_search'
    )

    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    train_set = input_fn(fn_args.train_files, tf_transform_output, 64)
    val_set = input_fn(fn_args.eval_files, tf_transform_output, 64)

    return TunerFnResult(
        tuner=tuner,
        fit_kwargs={
            'x': train_set,
            'validation_data': val_set,
            'epochs': 3,
            'steps_per_epoch': fn_args.train_steps,
            'validation_steps': fn_args.eval_steps
        }
    )


Writing modules/allergen_food_tuner.py


In [8]:
# @title Training module
%%writefile {TRAINER_MODULE_FILE}

import os

import tensorflow as tf
import tensorflow_transform as tft

from allergen_food_transform import (
    CATEGORICAL_FEATURES,
    LABEL_KEY,
    NUMERICAL_FEATURES,
    transformed_name,
)


def get_model(hp,show_summary=True):
    """
    This function defines a Keras model and returns the model as a
    Keras object.
    """

    # one-hot categorical features
    input_features = []
    for key, dim in CATEGORICAL_FEATURES.items():
        input_features.append(
            tf.keras.Input(shape=(dim + 1,), name=transformed_name(key))
        )

    for feature in NUMERICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    concatenate = tf.keras.layers.concatenate(input_features)
    deep = tf.keras.layers.Dense(256, activation="relu")(concatenate)
    # Tuning the number of hidden layers
    num_hidden_layers = hp.get(
        'hidden_layers') if hp and 'hidden_layers' in hp else 1
    for _ in range(num_hidden_layers):
        deep = tf.keras.layers.Dense(units=32, activation='relu')(deep)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(deep)

    model = tf.keras.models.Model(inputs=input_features, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(hp.get('learning_rate') if hp else 0.001),
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

    if show_summary:
        model.summary()

    return model


def gzip_reader_fn(filenames):
    """Loads compressed data"""
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')


def get_serve_tf_examples_fn(model, tf_transform_output):
    """Returns a function that parses a serialized tf.Example."""

    model.tft_layer = tf_transform_output.transform_features_layer()

    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        """Returns the output to be used in the serving signature."""
        feature_spec = tf_transform_output.raw_feature_spec()
        feature_spec.pop(LABEL_KEY)
        parsed_features = tf.io.parse_example(
            serialized_tf_examples, feature_spec
        )

        transformed_features = model.tft_layer(parsed_features)

        outputs = model(transformed_features)
        return {"outputs": outputs}

    return serve_tf_examples_fn


def input_fn(file_pattern, tf_transform_output, batch_size=64):
    """Generates features and labels for tuning/training.
    Args:
        file_pattern: input tfrecord file pattern.
        tf_transform_output: A TFTransformOutput.
        batch_size: representing the number of consecutive elements of
        returned dataset to combine in a single batch
    Returns:
        A dataset that contains (features, indices) tuple where features
        is a dictionary of Tensors, and indices is a single Tensor of
        label indices.
    """
    transformed_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transformed_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
    )

    return dataset

# TFX Trainer will call this function.


def run_fn(fn_args):
    """Train the model based on given args.
    Args:
    fn_args: Holds args used to train the model as name/value pairs.
    """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    train_dataset = input_fn(fn_args.train_files, tf_transform_output, 64)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, 64)

    hp = fn_args.hyperparameters.get(
        'values', {}) if fn_args.hyperparameters else {}

    model = get_model(hp)

    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), "logs")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, update_freq="batch"
    )

    model.fit(
        train_dataset,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback],
        epochs=3
    )

    signatures = {
        "serving_default": get_serve_tf_examples_fn(
            model, tf_transform_output
        ).get_concrete_function(
            tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")
        ),
    }
    model.save(
        fn_args.serving_model_dir, save_format="tf", signatures=signatures
    )


Writing modules/allergen_food_trainer.py


In [9]:
# @title Real Components


In [11]:
import shutil
shutil.make_archive("/content/arcive", 'zip', "/content")


'/content/arcive.zip'