# Plant Growth Classification Prediction Notebook

Notebook ini digunakan untuk membuat file .py sebagai module dari transform, trainer, dan tuner, serta untuk membuat file.py components

## Import Libraries

Dilakukan import libraries untuk libraries yang dibutuhkan pada proyek ini

In [101]:
import os
 
import tensorflow as tf
import tensorflow_model_analysis as tfma
import sys
from tfx.components import (
    CsvExampleGen, 
    StatisticsGen, 
    SchemaGen, 
    ExampleValidator, 
    Transform, 
    Trainer,
    Tuner,
    Evaluator,
    Pusher
)
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2 
from tfx.types import Channel
from tfx.dsl.components.common.resolver import Resolver
from tfx.types.standard_artifacts import Model, ModelBlessing
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import (
    LatestBlessedModelStrategy)
from typing import Text
from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from tfx.orchestration.metadata import sqlite_metadata_connection_config

Membuat directory dari pipeline, module, dan output component pipeline

In [108]:
PIPELINE_NAME = "tasyaputrialiya-pipeline"
 
# pipeline inputs
DATA_ROOT = "data"
MODULE_COMPONENTS = "modules/components.py"
TRANSFORM_MODULE_FILE = "modules/plant_growth_transform.py"
TRAINER_MODULE_FILE = "modules/plant_growth_trainer.py"
TUNER_MODULE_FILE = "modules/plant_growth_tuner.py"
LOCAL_PIPELINE="local_pipeline.py"
# requirement_file = os.path.join(root, "requirements.txt")

OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

## Membuat file TRANFORM_MODULE_FILE

code di bawah digunakan untuk membuat file transform module file yang akan digunakan sebagai module file dalam transform. Isi dari bagian module transform ini adalah function untuk mengubah nama dari variabel data dan juga memastikan tipe data yang akan digunakan sesuai.

In [103]:
%%writefile {TRANSFORM_MODULE_FILE}

import tensorflow as tf
import tensorflow_transform as tft

LABEL_KEY = "Growth_Milestone"
FEATURE_KEYS = ['Sunlight_Hours', 'Temperature', 'Humidity']

def transformed_name(key):
    """Renaming transformed features"""
    return key + "_xf"

def preprocessing_fn(inputs):
    """
    Preprocess input features into transformed features

    Args:
        inputs: map from feature keys to raw features.

    Return:
        outputs: map from feature keys to transformed features.    
    """
    
    outputs = {}
    
    # Transform numerical features
    for key in FEATURE_KEYS:
        outputs[transformed_name(key)] = tft.scale_to_z_score(inputs[key])
    
    # Transform label
    outputs[transformed_name(LABEL_KEY)] = tf.cast(inputs[LABEL_KEY], tf.int64)
    
    return outputs

Writing modules/plant_growth_transform.py


## Membuat file TRAINER_MODULE_FILE

Code di bawah ini digunakan untuk membuat file module yang akan digunakan dalam pipeline trainer, module ini akan berisi beberapa function, yaitu 
1. **transformed_name()** untuk melakukan transformasi nama
2. **gzip_reader_fn()** untuk memuat data dalam TFRecord
3. **input_fn()** untuk membuat transformed_feature yang dihasilkan komponen transform
4. **model_builder()** untuk membuat arsitektur model 
5. **_get_serve_tf_examples_fn()** untuk menjalankan tahapan preprocessing
6. **run_fn()** untuk menjalankan proses training

In [104]:
%%writefile {TRAINER_MODULE_FILE}
import tensorflow as tf
import tensorflow_transform as tft
from tensorflow.keras import layers
import os
from tfx.components.trainer.fn_args_utils import FnArgs

LABEL_KEY = "Growth_Milestone"
FEATURE_KEYS = ['Sunlight_Hours', 'Temperature', 'Humidity']

def transformed_name(key):
    """Renaming transformed features"""
    return key + "_xf"

def gzip_reader_fn(filenames):
    """Loads compressed data"""
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')

def input_fn(file_pattern, 
             tf_transform_output,
             num_epochs,
             batch_size=64) -> tf.data.Dataset:
    """Get post_transform feature & create batches of data"""
    
    # Get post_transform feature spec
    transform_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy())
    
    # Create batches of data
    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transform_feature_spec,
        reader=gzip_reader_fn,
        num_epochs=num_epochs,
        label_key=transformed_name(LABEL_KEY))
    return dataset

def model_builder():
    """Build machine learning model"""
    inputs = {transformed_name(key): tf.keras.Input(shape=(1,), name=transformed_name(key), dtype=tf.float32) for key in FEATURE_KEYS}
    x = layers.Concatenate()(list(inputs.values()))
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dense(32, activation='relu')(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=tf.keras.optimizers.Adam(0.01),
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )
    
    model.summary()
    return model

def _get_serve_tf_examples_fn(model, tf_transform_output):
    
    model.tft_layer = tf_transform_output.transform_features_layer()
    
    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        
        feature_spec = tf_transform_output.raw_feature_spec()
        
        feature_spec.pop(LABEL_KEY)
        
        parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)
        
        transformed_features = model.tft_layer(parsed_features)
        
        # Get predictions using the transformed features
        return model(transformed_features)
        
    return serve_tf_examples_fn

def run_fn(fn_args: FnArgs) -> None:
    
    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), 'logs')
    
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, update_freq='batch'
    )
    
    es = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', mode='max', verbose=1, patience=10)
    mc = tf.keras.callbacks.ModelCheckpoint(fn_args.serving_model_dir, monitor='val_binary_accuracy', mode='max', verbose=1, save_best_only=True)
    
    # Load the transform output
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)
    
    # Create batches of data
    train_set = input_fn(fn_args.train_files, tf_transform_output, num_epochs=10)
    val_set = input_fn(fn_args.eval_files, tf_transform_output, num_epochs=10)
    
    # Build the model
    model = model_builder()
    
    # Train the model
    model.fit(
        x=train_set,
        validation_data=val_set,
        callbacks=[tensorboard_callback, es, mc],
        steps_per_epoch=1000, 
        validation_steps=1000,
        epochs=10
    )

    signatures = {
        'serving_default':
        _get_serve_tf_examples_fn(model, tf_transform_output).get_concrete_function(
                                    tf.TensorSpec(
                                    shape=[None],
                                    dtype=tf.string,
                                    name='examples'))
    }
    model.save(fn_args.serving_model_dir, save_format='tf', signatures=signatures)

Writing modules/plant_growth_trainer.py


## Membuat file TUNER_MODULE_FILE

Code di bawah digunakan untuk membuat file module yang akan digunakan pada tuner dimana module ini akan berisi beberapa function yang sebagiannya menyerupai trainer, function tersebut yaitu:
1. **transformed_name()** untuk mentransformasi nama 
2. **gzip_reader_fn()** untuk memuat data dalam TFRecord
3. **input_fn()** untuk mengambil input
4. **model_builder** untuk membuat model yang akan digunakan dalam tuner
5. **tuner_fn()** untuk menjalankan tuner

In [105]:
%%writefile {TUNER_MODULE_FILE}
from typing import NamedTuple, Dict, Any, Text
from tfx.components.trainer.fn_args_utils import FnArgs
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import layers
import keras_tuner as kt
from keras_tuner.engine.base_tuner import BaseTuner
import tensorflow_transform as tft
import tensorflow as tf

TunerFnResult = NamedTuple('TunerFnResult', [('tuner', BaseTuner), ('fit_kwargs', Dict[Text, Any])])

LABEL_KEY = "Growth_Milestone"
FEATURE_KEYS = ['Sunlight_Hours', 'Temperature', 'Humidity']


def transformed_name(key):
    """Renaming transformed features"""
    return key + "_xf"

def gzip_reader_fn(filenames):
    """Loads compressed data"""
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')

def input_fn(file_pattern, tf_transform_output, num_epochs=None, batch_size=64):
    """Get post_transform feature & create batches of data"""
    
    # Get post_transform feature spec
    transform_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy())
    
    # create batches of data
    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transform_feature_spec,
        reader=gzip_reader_fn,
        num_epochs=num_epochs,
        label_key=transformed_name(LABEL_KEY))
    return dataset

def model_builder(hp):
    """Build machine learning model with hyperparameters."""
    inputs = {transformed_name(key): tf.keras.Input(shape=(1,), name=transformed_name(key), dtype=tf.float32) for key in FEATURE_KEYS}
    x = layers.Concatenate()(list(inputs.values()))
    x = layers.Dense(units=hp.Int('units_1', min_value=32, max_value=128, step=32), activation='relu')(x)
    x = layers.Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1))(x)
    x = layers.Dense(units=hp.Int('units_2', min_value=16, max_value=64, step=16), activation='relu')(x)
    x = layers.Dropout(rate=hp.Float('dropout_2', min_value=0.0, max_value=0.5, step=0.1))(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=tf.keras.optimizers.Adam(learning_rate=hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])),
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )
    
    return model

def tuner_fn(fn_args: FnArgs) -> TunerFnResult:
    """Build the tuner using the KerasTuner API."""
    
    # Load the transformed data
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)
    train_set = input_fn(fn_args.train_files, tf_transform_output, num_epochs=10)
    val_set = input_fn(fn_args.eval_files, tf_transform_output, num_epochs=10)

    # Define the hyperband tuner
    tuner = kt.Hyperband(
        model_builder,
        objective='val_binary_accuracy',
        max_epochs=10,
        factor=3,
        directory=fn_args.working_dir,
        project_name='kt_hyperband'
    )

    # Set fit arguments for the tuner
    early_stopping = EarlyStopping(monitor='val_binary_accuracy',  mode='max', min_delta=0.001, patience=5, verbose=1)

    fit_kwargs = {
        "callbacks": [early_stopping],
        'x': train_set,
        'validation_data': val_set,
        'steps_per_epoch': fn_args.train_steps,
        'validation_steps': fn_args.eval_steps
    }

    return TunerFnResult(tuner=tuner, fit_kwargs=fit_kwargs)

Writing modules/plant_growth_tuner.py


## Membuat file MODULE_COMPONENTS

Code di bawah digunakan untuk membuat file module yang akan menyatukan seluruh TFX components. TFX Components yang telah disatukan pada file components hingga Pusher disatukan di module components berikut.

In [106]:
%%writefile {MODULE_COMPONENTS}

"""Initiate tfx pipeline components
"""
 
import os
 
import tensorflow as tf
import tensorflow_model_analysis as tfma
from tfx.components import (
    CsvExampleGen, 
    StatisticsGen, 
    SchemaGen, 
    ExampleValidator, 
    Transform, 
    Trainer,
    Tuner,
    Evaluator,
    Pusher
)
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2 
from tfx.types import Channel
from tfx.dsl.components.common.resolver import Resolver
from tfx.types.standard_artifacts import Model, ModelBlessing
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import (
    LatestBlessedModelStrategy)
 
def init_components(
    data_dir,
    transform_module,
    training_module,
    tuner_module,
    training_steps,
    eval_steps,
    serving_model_dir,
):
    """Initiate tfx pipeline components
 
    Args:
        data_dir (str): a path to the data
        transform_module (str): a path to the transform_module
        training_module (str): a path to the transform_module
        training_steps (int): number of training steps
        eval_steps (int): number of eval steps
        serving_model_dir (str): a path to the serving model directory
 
    Returns:
        TFX components
    """
    output = example_gen_pb2.Output(
        split_config = example_gen_pb2.SplitConfig(splits=[
            example_gen_pb2.SplitConfig.Split(name="train", hash_buckets=8),
            example_gen_pb2.SplitConfig.Split(name="eval", hash_buckets=2)
        ])
    )
 
    example_gen = CsvExampleGen(
        input_base=data_dir, 
        output_config=output
    )
    
    statistics_gen = StatisticsGen(
        examples=example_gen.outputs["examples"]   
    )
    
    schema_gen = SchemaGen(
        statistics=statistics_gen.outputs["statistics"]
    )
    
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs['statistics'],
        schema=schema_gen.outputs['schema']
    )
    
    transform  = Transform(
        examples=example_gen.outputs['examples'],
        schema= schema_gen.outputs['schema'],
        module_file=os.path.abspath(transform_module)
    )
    
    trainer  = Trainer(
        module_file=os.path.abspath(training_module),
        examples = transform.outputs['transformed_examples'],
        transform_graph=transform.outputs['transform_graph'],
        schema=schema_gen.outputs['schema'],
        train_args=trainer_pb2.TrainArgs(
            splits=['train'],
            num_steps=training_steps),
        eval_args=trainer_pb2.EvalArgs(
            splits=['eval'], 
            num_steps=eval_steps)
    )
    
    tuner = Tuner(
    module_file=os.path.abspath(tuner_module),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=500),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=100)
    )

    model_resolver = Resolver(
        strategy_class= LatestBlessedModelStrategy,
        model = Channel(type=Model),
        model_blessing = Channel(type=ModelBlessing)
    ).with_id('Latest_blessed_model_resolver')
    
    label_key = "Growth_Milestone"  # Update this if your label key is different

    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key=label_key)],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=[
            tfma.MetricsSpec(
                metrics=[
                    tfma.MetricConfig(class_name="ExampleCount"),
                    tfma.MetricConfig(class_name="AUC"),
                    tfma.MetricConfig(class_name="FalsePositives"),
                    tfma.MetricConfig(class_name="TruePositives"),
                    tfma.MetricConfig(class_name="FalseNegatives"),
                    tfma.MetricConfig(class_name="TrueNegatives"),
                    tfma.MetricConfig(
                        class_name="BinaryAccuracy",
                        threshold=tfma.MetricThreshold(
                            value_threshold=tfma.GenericValueThreshold(
                                lower_bound={"value": 0.5}
                            ),
                            change_threshold=tfma.GenericChangeThreshold(
                                direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                                absolute={"value": 0.0001},
                            ),
                        ),
                    ),
                ]
            )
        ],
    )
    
    evaluator = Evaluator(
        examples=example_gen.outputs['examples'],
        model=trainer.outputs['model'],
        baseline_model=model_resolver.outputs['model'],
        eval_config=eval_config)
    
    pusher = Pusher(
        model=trainer.outputs["model"],
        model_blessing=evaluator.outputs["blessing"],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(
                base_directory=serving_model_dir
            )
        ),
    )
    
    components = (
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        trainer,
        tuner,
        model_resolver,
        evaluator,
        pusher
    )
    
    return components

Writing modules/components.py
