# Import

In [8]:
import os
from absl import logging
from typing import Text
from tfx.orchestration import pipeline, metadata
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

# Define

Pertama, akan mengatur konfigurasi pipeline TFX dengan menentukan nama pipeline, lokasi data, path modul yang diperlukan, serta direktori output untuk menyimpan hasil proses pipeline machine learning.

In [9]:
def get_pipeline_variables():
    # Nama pipeline
    PIPELINE_NAME = "mellisadmyn-pipeline"

    # Pipeline inputs
    DATA_ROOT = "data"
    MODULES_FOLDER = "modules"
    TRANSFORM_MODULE_FILE = os.path.join(MODULES_FOLDER, "transform.py")
    TRAINER_MODULE_FILE = os.path.join(MODULES_FOLDER, "trainer.py")
    TUNER_MODULE_FILE = os.path.join(MODULES_FOLDER, "tuner.py")
    COMPONENTS_MODULE_FILE = os.path.join(MODULES_FOLDER, "components.py")

    # Pipeline outputs
    OUTPUT_BASE = "output"
    serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
    pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
    metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

    # Buat folder modules jika belum ada
    os.makedirs(MODULES_FOLDER, exist_ok=True)

    # Buat file kosong jika belum ada
    for file in [TRANSFORM_MODULE_FILE, TRAINER_MODULE_FILE, TUNER_MODULE_FILE, COMPONENTS_MODULE_FILE]:
        if not os.path.exists(file):
            with open(file, 'w') as f:
                f.write(f"# {os.path.basename(file)} file for pipeline.\n")

    # Return all variables as separate values
    return (
        PIPELINE_NAME,
        DATA_ROOT,
        TRANSFORM_MODULE_FILE,
        TRAINER_MODULE_FILE,
        TUNER_MODULE_FILE,
        COMPONENTS_MODULE_FILE,
        OUTPUT_BASE,
        serving_model_dir,
        pipeline_root,
        metadata_path,
    )

# Memanggil fungsi dan meng-unpack variabel
PIPELINE_NAME, DATA_ROOT, TRANSFORM_MODULE_FILE, TRAINER_MODULE_FILE, TUNER_MODULE_FILE, COMPONENTS_MODULE_FILE, OUTPUT_BASE, serving_model_dir, pipeline_root, metadata_path = get_pipeline_variables()

# Print variables
print("PIPELINE_NAME:", PIPELINE_NAME)
print("DATA_ROOT:", DATA_ROOT)
print("TRANSFORM_MODULE_FILE:", TRANSFORM_MODULE_FILE)
print("TRAINER_MODULE_FILE:", TRAINER_MODULE_FILE)
print("TUNER_MODULE_FILE:", TUNER_MODULE_FILE)
print("COMPONENTS_MODULE_FILE:", COMPONENTS_MODULE_FILE)
print("OUTPUT_BASE:", OUTPUT_BASE)
print("serving_model_dir:", serving_model_dir)
print("pipeline_root:", pipeline_root)
print("metadata_path:", metadata_path)


PIPELINE_NAME: mellisadmyn-pipeline
DATA_ROOT: data
TRANSFORM_MODULE_FILE: modules\transform.py
TRAINER_MODULE_FILE: modules\trainer.py
TUNER_MODULE_FILE: modules\tuner.py
COMPONENTS_MODULE_FILE: modules\components.py
OUTPUT_BASE: output
serving_model_dir: output\serving_model
pipeline_root: output\mellisadmyn-pipeline
metadata_path: output\mellisadmyn-pipeline\metadata.sqlite


Lalu, menginisialisasi komponen-komponen TFX yang membangun pipeline machine learning end-to-end, mulai dari membaca dan memproses data, melakukan validasi dan transformasi, melakukan tuning dan pelatihan model, mengevaluasi kinerja model, hingga menyimpan model yang lolos evaluasi untuk keperluan deployment.

# Components

Pertama adalah membuat components.py

Kode berikut mendefinisikan fungsi `init_components`, yang menginisialisasi semua komponen TFX seperti pembacaan data (`CsvExampleGen`), validasi data (`ExampleValidator`), transformasi fitur (`Transform`), tuning hyperparameter (`Tuner`), pelatihan model (`Trainer`), evaluasi model (`Evaluator`), hingga penyimpanan model yang telah dilatih dan lolos evaluasi (`Pusher`), untuk membangun pipeline machine learning end-to-end.

In [None]:
%%writefile {COMPONENTS_MODULE_FILE}
"""
Modul ini berisi fungsi untuk menginisialisasi semua komponen TFX yang
digunakan dalam pipeline machine learning end-to-end.
"""

import os
import tensorflow_model_analysis as tfma
from tfx.components import (
    CsvExampleGen,
    StatisticsGen,
    SchemaGen,
    ExampleValidator,
    Transform,
    Trainer,
    Tuner,
    Evaluator,
    Pusher,
)
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.types import Channel
from tfx.dsl.components.common.resolver import Resolver
from tfx.types.standard_artifacts import Model, ModelBlessing
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import (
    LatestBlessedModelStrategy,
)

# Fungsi untuk melakukan inisialisasi komponen
def init_components(config):
    """
    Mengembalikan komponen TFX untuk pipeline.

    Args:
        config (dict): Dictionary konfigurasi dengan path dan pengaturan.

    Returns:
        tuple: Komponen pipeline TFX.
    """

    # Membagi dataset dengan perbandingan 80% untuk training dan 20% untuk evaluasi
    output = example_gen_pb2.Output(    # pylint: disable=no-member
        split_config=example_gen_pb2.SplitConfig(   # pylint: disable=no-member
            splits=[
                example_gen_pb2.SplitConfig.Split(name="train", hash_buckets=8),    # pylint: disable=no-member
                example_gen_pb2.SplitConfig.Split(name="eval", hash_buckets=2),     # pylint: disable=no-member
            ]
        )
    )

    # Komponen example gen
    example_gen = CsvExampleGen(
        input_base=config["DATA_ROOT"],
        output_config=output,
    )

    # Komponen statistics gen
    statistics_gen = StatisticsGen(
        examples=example_gen.outputs["examples"]
    )

    # Komponen schema gen
    schema_gen = SchemaGen(
        statistics=statistics_gen.outputs["statistics"]
    )

    # Komponen example validator
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs["statistics"],
        schema=schema_gen.outputs["schema"],
    )

    # Komponen transform menggunakan modul transform.py
    transform = Transform(
        examples=example_gen.outputs["examples"],
        schema=schema_gen.outputs["schema"],
        module_file=os.path.abspath(config["transform_module"]),
    )

    # Komponen tuner menggunakan modul tuner.py
    tuner = Tuner(
        module_file=os.path.abspath(config["tuner_module"]),
        examples=transform.outputs["transformed_examples"],
        transform_graph=transform.outputs["transform_graph"],
        schema=schema_gen.outputs["schema"],
        train_args=trainer_pb2.TrainArgs(  # pylint: disable=no-member
            splits=["train"],
            num_steps=config["training_steps"],
        ),
        eval_args=trainer_pb2.EvalArgs(  # pylint: disable=no-member
            splits=["eval"],
            num_steps=config["eval_steps"],
        ),
    )

    # Komponen trainer menggunakan modul trainer.py
    trainer = Trainer(
        module_file=os.path.abspath(config["training_module"]),
        examples=transform.outputs["transformed_examples"],
        transform_graph=transform.outputs["transform_graph"],
        schema=schema_gen.outputs["schema"],
        hyperparameters=tuner.outputs["best_hyperparameters"],
        train_args=trainer_pb2.TrainArgs(  # pylint: disable=no-member
            splits=["train"],
            num_steps=config["training_steps"],
        ),
        eval_args=trainer_pb2.EvalArgs(  # pylint: disable=no-member
            splits=["eval"],
            num_steps=config["eval_steps"],
        ),
    )

    # Komponen model resolver
    model_resolver = Resolver(
        strategy_class=LatestBlessedModelStrategy,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing),
    ).with_id("Latest_blessed_model_resolver")

    # Konfigurasi metrik evaluasi
    metrics_specs = [
        tfma.MetricsSpec(
            metrics=[
                tfma.MetricConfig(class_name="AUC"),
                tfma.MetricConfig(class_name="Precision"),
                tfma.MetricConfig(class_name="Recall"),
                tfma.MetricConfig(class_name="ExampleCount"),
                tfma.MetricConfig(
                    class_name="BinaryAccuracy",
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(
                            lower_bound={"value": 0.8}
                        ),
                        change_threshold=tfma.GenericChangeThreshold(
                            direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                            absolute={"value": 0.0001},
                        ),
                    ),
                ),
            ]
        )
    ]

    # Konfigurasi evaluasi
    eval_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key="Attrition")],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=metrics_specs,
    )

    # Komponen evaluator
    evaluator = Evaluator(
        examples=example_gen.outputs["examples"],
        model=trainer.outputs["model"],
        baseline_model=model_resolver.outputs["model"],
        eval_config=eval_config,
    )

    # Komponen pusher
    pusher = Pusher(
        model=trainer.outputs["model"],
        model_blessing=evaluator.outputs["blessing"],
        push_destination=pusher_pb2.PushDestination(  # pylint: disable=no-member
            filesystem=pusher_pb2.PushDestination.Filesystem(  # pylint: disable=no-member
                base_directory=config["serving_model_dir"]
            )
        ),
    )

    # Mengembalikan semua komponen pipeline
    return (
        example_gen,
        statistics_gen,
        schema_gen,
        example_validator,
        transform,
        tuner,
        trainer,
        model_resolver,
        evaluator,
        pusher,
    )


# Transform

Lalu, yang kedua adalah mendefinisikan file `transform.py`,

yang berisi fungsi `preprocessing_fn` untuk melakukan preprocessing data. Preprocessing ini mencakup normalisasi fitur numerik (`TotalWorkingYears`, `Age`, `MonthlyIncome`), encoding fitur kategorikal (`OverTime`, `MaritalStatus`, `JobRole`, `Department`), dan transformasi label (`Attrition`) menjadi nilai numerik, sehingga data siap digunakan oleh komponen pipeline lainnya seperti pelatihan model (`Trainer`).

In [None]:
%%writefile {TRANSFORM_MODULE_FILE}
"""
Modul ini berisi fungsi untuk melakukan preprocessing dataset menggunakan TFX Transform.
"""

import tensorflow as tf
import tensorflow_transform as tft

# Daftar numerical fitur pada dataset
NUMERICAL_FEATURES = [
    "TotalWorkingYears",
    "Age",
    "MonthlyIncome",
]

# Daftar categorical fitur pada dataset
CATEGORICAL_FEATURES = {
    "OverTime": 2,  # Yes/No -> Binary (2 unique values)
    "MaritalStatus": 3,  # Married/Single/Divorced -> 3 unique values
    "JobRole": 9,  # Healthcare Representative, Sales Executive, etc. -> 9 unique values
    "Department": 3,  # Research & Development, Sales, etc. -> 3 unique values
}

# Label key
LABEL_KEY = "Attrition"

# Fungsi untuk mengubah nama fitur yang sudah ditransformasi
def transformed_name(key):
    """Renaming transformed features."""
    return key + "_xf"

# Fungsi untuk melakukan preprocessing
def preprocessing_fn(inputs):
    """
    Preprocess input features into transformed features.

    Args:
        inputs: Map dari kunci fitur ke raw features.

    Returns:
        outputs: Map dari kunci fitur ke fitur yang telah ditransformasi.
    """
    outputs = {}

    # Transformasi untuk fitur numerik
    for feature in NUMERICAL_FEATURES:
        outputs[transformed_name(feature)] = tft.scale_to_0_1(inputs[feature])

    # Transformasi untuk fitur kategorikal
    for feature in CATEGORICAL_FEATURES:
        outputs[transformed_name(feature)] = tft.compute_and_apply_vocabulary(
            inputs[feature]
        )

    # Transformasi untuk label
    outputs[transformed_name(LABEL_KEY)] = tf.cast(
        tft.compute_and_apply_vocabulary(inputs[LABEL_KEY]), tf.int64
    )

    return outputs


# Tuner

Kode ketiga yaitu mendefinisikan file `tuner.py`, 

yang berisi fungsi `model_builder` untuk membuat model deep learning dengan arsitektur fleksibel menggunakan hyperparameter tuning. Fungsi ini memungkinkan eksplorasi konfigurasi terbaik, seperti jumlah unit di hidden layer, dropout rate, dan learning rate, untuk tugas klasifikasi biner. 

Selain itu, fungsi `tuner_fn` digunakan untuk menginisialisasi proses tuning menggunakan `KerasTuner`, memanfaatkan dataset yang telah ditransformasikan, dan mencari kombinasi hyperparameter terbaik berdasarkan metrik validasi (`val_binary_accuracy`).

In [None]:
%%writefile {TUNER_MODULE_FILE}
"""
Modul ini mengatur fungsi untuk tuning hyperparameter
model klasifikasi biner menggunakan Keras Tuner.
"""

import tensorflow as tf
import keras_tuner as kt
import tensorflow_transform as tft
from tfx.v1.components import TunerFnResult
from tfx.components.trainer.fn_args_utils import FnArgs
from transform import NUMERICAL_FEATURES, CATEGORICAL_FEATURES, transformed_name
from trainer import input_fn

# Fungsi untuk membuat input features (menghindari duplikasi kode)
def create_input_features():
    """
    Membuat input features untuk numerical dan categorical features.
    
    Returns:
        list: Daftar input layers.
    """
    input_features = []

    # Input layers for numerical features
    for feature in NUMERICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    # Input layers for categorical features
    for feature in CATEGORICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    return input_features

# Fungsi untuk membuat model
def model_builder(hyperparameters):
    """
    Defines and returns a Keras model for binary classification.
    """
    # Membuat input features
    input_features = create_input_features()

    # Concatenate all inputs
    concatenate = tf.keras.layers.concatenate(input_features)

    # Hidden layers with hyperparameter tuning
    deep = tf.keras.layers.Dense(
        hyperparameters.Choice('units_layer1', [64, 128, 256]),
        activation="relu")(concatenate)
    deep = tf.keras.layers.Dropout(
        hyperparameters.Choice('dropout_layer1', [0.2, 0.3, 0.4]))(deep)

    deep = tf.keras.layers.Dense(
        hyperparameters.Choice('units_layer2', [32, 64, 128]),
        activation="relu")(deep)
    deep = tf.keras.layers.Dropout(
        hyperparameters.Choice('dropout_layer2', [0.2, 0.3, 0.4]))(deep)

    # Output layer
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(deep)

    # Compile the model
    model = tf.keras.models.Model(inputs=input_features, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            learning_rate=hyperparameters.Choice('learning_rate', [0.001, 0.0001])
        ),
        loss='binary_crossentropy',
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

    return model

# Fungsi tuner
def tuner_fn(fn_args: FnArgs):
    """
    Hyperparameter tuning function for the model.
    """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_graph_path)

    # Load training and evaluation datasets
    train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=64)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=64)

    # Create tuner
    tuner = kt.RandomSearch(
        model_builder,
        objective='val_binary_accuracy',
        max_trials=10,
        directory=fn_args.working_dir,
        project_name='attrition_tuning'
    )

    return TunerFnResult(
        tuner=tuner,
        fit_kwargs={
            "x": train_dataset,
            "validation_data": eval_dataset,
            "steps_per_epoch": fn_args.train_steps,
            "validation_steps": fn_args.eval_steps,
            "epochs": 10
        }
    )


# Trainer 

Selanjutnya, Kode keempat berikut mendefinisikan file `trainer.py`, 

yang berisi fungsi `get_model` untuk membangun arsitektur model deep learning menggunakan TensorFlow untuk tugas klasifikasi biner. Fungsi ini mencakup definisi input fitur (numerik dan kategorikal), hidden layer dengan aktivasi ReLU, dan layer output dengan aktivasi sigmoid. 

Selain itu, fungsi `run_fn` digunakan untuk melatih model menggunakan dataset yang telah diproses, dengan logging melalui TensorBoard, serta menyimpan model yang telah dilatih lengkap dengan serving signature untuk keperluan deployment. Visualisasi arsitektur model juga disimpan sebagai gambar.

In [None]:
%%writefile {TRAINER_MODULE_FILE}
"""
Modul ini berisi fungsi untuk pelatihan model machine learning
menggunakan pipeline TFX, termasuk definisi model, proses pelatihan,
dan evaluasi.
"""

import os
import tensorflow as tf
import tensorflow_transform as tft
from keras.utils.vis_utils import plot_model


from transform import (
    NUMERICAL_FEATURES,
    CATEGORICAL_FEATURES,
    transformed_name,
    LABEL_KEY,
)

# Fungsi untuk membuat model
def get_model(show_summary=True):
    """
    Defines a Keras model for binary classification.
    """
    # Input layers for numerical features
    input_features = []
    for feature in NUMERICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    # Input layers for categorical features
    for feature in CATEGORICAL_FEATURES:
        input_features.append(
            tf.keras.Input(shape=(1,), name=transformed_name(feature))
        )

    # Concatenate all inputs
    concatenate = tf.keras.layers.concatenate(input_features)

    # Hidden layers
    deep = tf.keras.layers.Dense(128, activation="relu")(concatenate)
    deep = tf.keras.layers.Dropout(0.3)(deep)
    deep = tf.keras.layers.Dense(64, activation="relu")(deep)
    deep = tf.keras.layers.Dropout(0.3)(deep)

    # Output layer
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(deep)

    # Compile the model
    model = tf.keras.models.Model(inputs=input_features, outputs=outputs)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss="binary_crossentropy",
        metrics=[tf.keras.metrics.BinaryAccuracy()]
    )

    if show_summary:
        model.summary()

    return model

# Fungsi untuk membaca data yang sudah dikompresi
def gzip_reader_fn(filenames):
    """Reads compressed data."""
    return tf.data.TFRecordDataset(filenames, compression_type='GZIP')

# Fungsi untuk mendapatkan fitur yang sudah ditransformasi
def get_serve_tf_examples_fn(model, tf_transform_output):
    """Returns a serving function for the model."""

    model.tft_layer = tf_transform_output.transform_features_layer()

    @tf.function
    def serve_tf_examples_fn(serialized_tf_examples):
        """Parses a serialized tf.Example and returns model predictions."""
        feature_spec = tf_transform_output.raw_feature_spec()
        feature_spec.pop(LABEL_KEY)
        parsed_features = tf.io.parse_example(serialized_tf_examples, feature_spec)

        transformed_features = model.tft_layer(parsed_features)

        outputs = model(transformed_features)
        return {"outputs": outputs}

    return serve_tf_examples_fn

# Fungsi untuk membuat dataset
def input_fn(file_pattern, tf_transform_output, batch_size=64):
    """Generates features and labels for training/evaluation."""
    transformed_feature_spec = (
        tf_transform_output.transformed_feature_spec().copy()
    )

    dataset = tf.data.experimental.make_batched_features_dataset(
        file_pattern=file_pattern,
        batch_size=batch_size,
        features=transformed_feature_spec,
        reader=gzip_reader_fn,
        label_key=transformed_name(LABEL_KEY),
    )

    return dataset

# Fungsi untuk menjalankan pelatihan model
def run_fn(fn_args):
    """
    Train the model using the given arguments.
    """
    tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)

    # Load training and evaluation datasets
    train_dataset = input_fn(fn_args.train_files, tf_transform_output, batch_size=64)
    eval_dataset = input_fn(fn_args.eval_files, tf_transform_output, batch_size=64)

    # Define the model
    model = get_model()

    # Define callbacks
    log_dir = os.path.join(os.path.dirname(fn_args.serving_model_dir), "logs")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=log_dir, update_freq="batch"
    )

    # Train the model
    model.fit(
        train_dataset,
        steps_per_epoch=fn_args.train_steps,
        validation_data=eval_dataset,
        validation_steps=fn_args.eval_steps,
        callbacks=[tensorboard_callback],
        epochs=10
    )

    # Save the model with serving signature
    signatures = {
        "serving_default": get_serve_tf_examples_fn(
            model, tf_transform_output
        ).get_concrete_function(
            tf.TensorSpec(shape=[None], dtype=tf.string, name="examples")
        ),
    }
    model.save(
        fn_args.serving_model_dir, save_format="tf", signatures=signatures
    )

    # Visualize the model architecture
    plot_model(
        model,
        to_file='images/model_plot.png',
        show_shapes=True,
        show_layer_names=True
    )


# Running

Setelah semua modules siap digunakan. Lanjut untuk mendefinisikan fungsi `init_local_pipeline` untuk menginisialisasi pipeline TFX lokal, dengan konfigurasi Apache Beam untuk eksekusi multi-processing, caching, dan koneksi metadata menggunakan SQLite.

In [14]:
def init_local_pipeline(
    components, pipeline_root: str
) -> pipeline.Pipeline:
    """
    Initializes a local TFX pipeline.

    Args:
        components: List of TFX components to include in the pipeline.
        pipeline_root: Path to the root directory for pipeline outputs.

    Returns:
        A TFX pipeline object.
    """
    
    logging.info(f"Pipeline root set to: {pipeline_root}")

    # Beam arguments for running the pipeline locally
    beam_args = [
        "--direct_running_mode=multi_processing",
        "--direct_num_workers=0"  # Automatically detect number of workers
    ]
    
    # Create the pipeline
    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        beam_pipeline_args=beam_args
    )


Lalu akan menjalankan pipeline TFX, dimulai dengan inisialisasi komponen pipeline berdasarkan konfigurasi yang telah ditentukan, kemudian membuat pipeline lokal menggunakan fungsi `init_local_pipeline`, dan akhirnya mengeksekusi pipeline menggunakan `BeamDagRunner`.

In [15]:
from modules.components import init_components

logging.set_verbosity(logging.INFO)

# Konfigurasi pipeline
config = {
    "DATA_ROOT": DATA_ROOT,
    "training_module": TRAINER_MODULE_FILE,
    "transform_module": TRANSFORM_MODULE_FILE,
    "tuner_module": TUNER_MODULE_FILE,
    "training_steps": 1000,
    "eval_steps": 250,
    "serving_model_dir": serving_model_dir,
}

# Inisialisasi komponen pipeline
components = init_components(config)

# Membuat pipeline
pipeline = init_local_pipeline(components, pipeline_root)

# Menjalankan pipeline
BeamDagRunner().run(pipeline=pipeline)


Trial 10 Complete [00h 00m 34s]
val_binary_accuracy: 0.8862500190734863

Best val_binary_accuracy So Far: 0.8863750100135803
Total elapsed time: 00h 06m 08s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit
INFO:absl:Finished tuning... Tuner ID: tuner0
INFO:absl:Best HyperParameters: {'space': [{'class_name': 'Choice', 'config': {'name': 'units_layer1', 'default': 64, 'conditions': [], 'values': [64, 128, 256], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'dropout_layer1', 'default': 0.2, 'conditions': [], 'values': [0.2, 0.3, 0.4], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'units_layer2', 'default': 32, 'conditions': [], 'values': [32, 64, 128], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'dropout_layer2', 'default': 0.2, 'conditions': [], 'values': [0.2, 0.3, 0.4], 'ordered': True}}, {'class_name': 'Choice', 'config': {'name': 'learning_rate', 'default': 0.001, 'conditions': [], 'values': [0.001, 0.0001], 'ordered': True}}], 'values': {'units_layer1': 64, 'dropout_layer1': 0.2, 'units_layer2': 64, 'dropout_layer2': 0.2, 'learning_rate': 0.0001}}
INFO:absl:Best Hyperparameters are written to o

INFO:absl:Execution 7 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'tuner_results': [Artifact(artifact: uri: "output\\mellisadmyn-pipeline\\Tuner\\tuner_results\\7"
, artifact_type: name: "TunerResults"
)], 'best_hyperparameters': [Artifact(artifact: uri: "output\\mellisadmyn-pipeline\\Tuner\\best_hyperparameters\\7"
, artifact_type: name: "HyperParameters"
)]}) for execution 7
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Tuner is finished.


Results summary
Results in output\mellisadmyn-pipeline\Tuner\.system\executor_execution\7\.temp\7\attrition_tuning
Showing 10 best trials
Objective(name="val_binary_accuracy", direction="max")

Trial 07 summary
Hyperparameters:
units_layer1: 64
dropout_layer1: 0.2
units_layer2: 64
dropout_layer2: 0.2
learning_rate: 0.0001
Score: 0.8863750100135803

Trial 00 summary
Hyperparameters:
units_layer1: 64
dropout_layer1: 0.2
units_layer2: 128
dropout_layer2: 0.4
learning_rate: 0.0001
Score: 0.8863124847412109

Trial 09 summary
Hyperparameters:
units_layer1: 128
dropout_layer1: 0.2
units_layer2: 32
dropout_layer2: 0.4
learning_rate: 0.001
Score: 0.8862500190734863

Trial 03 summary
Hyperparameters:
units_layer1: 128
dropout_layer1: 0.4
units_layer2: 32
dropout_layer2: 0.2
learning_rate: 0.0001
Score: 0.8821250200271606

Trial 05 summary
Hyperparameters:
units_layer1: 128
dropout_layer1: 0.3
units_layer2: 128
dropout_layer2: 0.3
learning_rate: 0.0001
Score: 0.8819375038146973

Trial 01 summary


INFO:absl:node Trainer is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.trainer.component.Trainer"
    base_type: TRAIN
  }
  id: "Trainer"
}
contexts {
  contexts {
    type {
      name: "pipeline"
    }
    name {
      field_value {
        string_value: "mellisadmyn-pipeline"
      }
    }
  }
  contexts {
    type {
      name: "pipeline_run"
    }
    name {
      field_value {
        string_value: "20241223-124048.244141"
      }
    }
  }
  contexts {
    type {
      name: "node"
    }
    name {
      field_value {
        string_value: "mellisadmyn-pipeline.Trainer"
      }
    }
  }
}
inputs {
  inputs {
    key: "examples"
    value {
      channels {
        producer_node_query {
          id: "Transform"
        }
        context_queries {
          type {
            name: "pipeline"
          }
          name {
            field_value {
              string_value: "mellisadmyn-pipeline"
            }
          }
        }
    

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 TotalWorkingYears_xf (InputLay  [(None, 1)]         0           []                               
 er)                                                                                              
                                                                                                  
 Age_xf (InputLayer)            [(None, 1)]          0           []                               
                                                                                                  
 MonthlyIncome_xf (InputLayer)  [(None, 1)]          0           []                               
                                                                                                  
 OverTime_xf (InputLayer)       [(None, 1)]          0           []                         

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output\mellisadmyn-pipeline\Trainer\model\8\Format-Serving\assets


INFO:tensorflow:Assets written to: output\mellisadmyn-pipeline\Trainer\model\8\Format-Serving\assets


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


INFO:absl:Training complete. Model written to output\mellisadmyn-pipeline\Trainer\model\8\Format-Serving. ModelRun written to output\mellisadmyn-pipeline\Trainer\model_run\8
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 8 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "output\\mellisadmyn-pipeline\\Trainer\\model\\8"
, artifact_type: name: "Model"
base_type: MODEL
)], 'model_run': [Artifact(artifact: uri: "output\\mellisadmyn-pipeline\\Trainer\\model_run\\8"
, artifact_type: name: "ModelRun"
)]}) for execution 8
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Trainer is finished.
INFO:absl:node Evaluator is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.evaluator.component.Evaluator"
    base_type: EVALUATE
  }
  id: "Evaluator"
}
contexts {
  contexts {
    type {
      name: "pipeline"
    }



INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.
INFO:absl:udf_utils.get_fn {'fairness_indicator_thresholds': 'null', 'example_splits': 'null', 'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "Precision"\n        },\n        {\n          "class_name": "Recall"\n        },\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.8\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "Attrition"\n    }\n  ],\n  "slicing_specs": [\n    {}\n  ]\n}'} 'custom_extractors'
INFO:absl:Request was made to



INFO:absl:Evaluation complete. Results written to output\mellisadmyn-pipeline\Evaluator\evaluation\9.
INFO:absl:Checking validation results.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:absl:Blessing result True written to output\mellisadmyn-pipeline\Evaluator\blessing\9.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 9 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'evaluation': [Artifact(artifact: uri: "output\\mellisadmyn-pipeline\\Evaluator\\evaluation\\9"
, artifact_type: name: "ModelEvaluation"
)], 'blessing': [Artifact(artifact: uri: "output\\mellisadmyn-pipeline\\Evaluator\\blessing\\9"
, artifact_type: name: "ModelBlessing"
)]}) for execution 9
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Evaluator is finished.
INFO:absl:node Pusher is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.pusher.component.Pusher"
    base_type: DEPLOY
  }
  id: "Pusher"
}
contexts {
  contexts {
    type {
      name: "pipeline"
    

In [17]:
import tensorflow_model_analysis as tfma

# Path ke folder evaluation
eval_result = tfma.load_eval_result(output_path='output/mellisadmyn-pipeline/Evaluator/evaluation/9')

# Menampilkan metrik dalam bentuk tabel
tfma.view.render_slicing_metrics(eval_result)


SlicingMetricsViewer(config={'weightedExamplesColumn': 'example_count'}, data=[{'slice': 'Overall', 'metrics':…

In [19]:
eval_result = tfma.load_eval_result(output_path='output/mellisadmyn-pipeline/Evaluator/evaluation/9')

# Debugging: Cek isi dari eval_result
print(eval_result.config)
print(eval_result.slicing_metrics)


model_specs {
  label_key: "Attrition"
}
slicing_specs {
}
metrics_specs {
  metrics {
    class_name: "AUC"
  }
  metrics {
    class_name: "Precision"
  }
  metrics {
    class_name: "Recall"
  }
  metrics {
    class_name: "ExampleCount"
  }
  metrics {
    class_name: "BinaryAccuracy"
    threshold {
      value_threshold {
        lower_bound {
          value: 0.8
        }
      }
    }
  }
  model_names: ""
}

[((), {'': {'': {'binary_accuracy': {'doubleValue': 0.8363636363636363}, 'loss': {'doubleValue': 0.4381735324859619}, 'example_count': {'doubleValue': 220.0}, 'auc': {'doubleValue': 0.716339760638298}, 'precision': {'doubleValue': 0.4230769230769231}, 'recall': {'doubleValue': 0.34375}}}})]


In [23]:
import tensorflow_model_analysis as tfma

# Path ke hasil evaluasi TFMA
evaluation_path = 'output/mellisadmyn-pipeline/Evaluator/evaluation/9'  # Ganti sesuai path Anda

# Load hasil evaluasi
eval_result = tfma.load_eval_result(output_path=evaluation_path)

# Print slicing metrics dengan format yang mudah dibaca
print("Hasil Evaluasi Model:")
print("=" * 50)
for slicing_metric in eval_result.slicing_metrics:
    slice_key, metrics = slicing_metric
    print(f"Slice: {slice_key}")  # Key untuk slicing (misal: semua data atau slice tertentu)
    for metric_name, metric_value in metrics[''].items():  # Iterasi setiap metrik
        print(f"  {metric_name}: {metric_value}")
    print("-" * 50)


Hasil Evaluasi Model:
Slice: ()
  : {'binary_accuracy': {'doubleValue': 0.8363636363636363}, 'loss': {'doubleValue': 0.4381735324859619}, 'example_count': {'doubleValue': 220.0}, 'auc': {'doubleValue': 0.716339760638298}, 'precision': {'doubleValue': 0.4230769230769231}, 'recall': {'doubleValue': 0.34375}}
--------------------------------------------------
