# **Proyek Pengembangan Machine Learning Pipeline :**
- **Nama:** Muhammad Rakha Almasah
- **Email:** muh.rakha.al@gmail.com
- **ID Dicoding:** muhrakhaal

In [None]:
!pip install tfx tensorflow-transform --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.4/60.4 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.7/89.7 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.0/152.0 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.5/43.5 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m173.5/173.5 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
from absl import logging
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from tfx.orchestration.metadata import sqlite_metadata_connection_config
from tfx.components import CsvExampleGen, StatisticsGen, SchemaGen, ExampleValidator, Transform, Trainer, Evaluator, Tuner, Pusher
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.types import Channel
from tfx.types.standard_artifacts import Model, ModelBlessing
from tfx.orchestration import pipeline
from google.colab import files
from absl import logging
import tensorflow_model_analysis as tfma

In [None]:
USERNAME = "muhrakhaal"
PIPELINE_NAME = f"{USERNAME}"
BASE_DIR = f"/content/{USERNAME}"
DATA_DIR = os.path.join(BASE_DIR, "data")
PIPELINE_ROOT = os.path.join(BASE_DIR, "muhrakhaal_pipeline")
SERVING_MODEL_DIR = os.path.join(PIPELINE_ROOT, "serving_model")
METADATA_PATH = os.path.join(PIPELINE_ROOT, "metadata.sqlite")
LABEL_KEY = "IXIC"

In [None]:
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PIPELINE_ROOT, exist_ok=True)
print(f"Pipeline workspace created at {BASE_DIR}")

Pipeline workspace created at /content/muhrakhaal


In [None]:
print("Upload dataset Anda (file CSV)...")
uploaded = files.upload()
for name, data in uploaded.items():
    with open(os.path.join(DATA_DIR, name), 'wb') as f:
        f.write(data)
print(f"Dataset uploaded to {DATA_DIR}")

Upload dataset Anda (file CSV)...


Saving cleaned_merged_stock_data.csv to cleaned_merged_stock_data.csv
Dataset uploaded to /content/muhrakhaal/data


In [None]:
print("Upload file modul tambahan (nasdaq_transform, trainer_module)...")
uploaded = files.upload()
TRANSFORM_MODULE = os.path.join(BASE_DIR, "nasdaq_transform.py")
TRAINER_MODULE = os.path.join(BASE_DIR, "trainer_module.py")
for name, data in uploaded.items():
    with open(os.path.join(BASE_DIR, name), 'wb') as f:
        f.write(data)
print("File modul berhasil disimpan.")

Upload file modul tambahan (nasdaq_transform, trainer_module)...


Saving nasdaq_transform.py to nasdaq_transform.py
Saving trainer_module.py to trainer_module.py
File modul berhasil disimpan.


In [None]:
def create_pipeline():
    # ExampleGen
    output_config = example_gen_pb2.Output(
        split_config=example_gen_pb2.SplitConfig(
            splits=[
                example_gen_pb2.SplitConfig.Split(name="train", hash_buckets=8),
                example_gen_pb2.SplitConfig.Split(name="eval", hash_buckets=2),
            ]
        )
    )
    example_gen = CsvExampleGen(input_base=DATA_DIR, output_config=output_config)

    # Statistik dan validasi
    statistics_gen = StatisticsGen(examples=example_gen.outputs["examples"])
    schema_gen = SchemaGen(statistics=statistics_gen.outputs["statistics"])
    example_validator = ExampleValidator(
        statistics=statistics_gen.outputs["statistics"],
        schema=schema_gen.outputs["schema"]
    )

    # Transform
    transform = Transform(
        examples=example_gen.outputs["examples"],
        schema=schema_gen.outputs["schema"],
        module_file=TRANSFORM_MODULE
    )

    # Trainer
    trainer = Trainer(
        module_file=TRAINER_MODULE,
        examples=transform.outputs["transformed_examples"],
        transform_graph=transform.outputs["transform_graph"],
        schema=schema_gen.outputs["schema"],
        train_args=trainer_pb2.TrainArgs(num_steps=100),
        eval_args=trainer_pb2.EvalArgs(num_steps=20),
    )

    # Resolver
    resolver = Resolver(
        strategy_class=LatestBlessedModelStrategy,
        model=Channel(type=Model),
        model_blessing=Channel(type=ModelBlessing)
    )

    # Evaluator
    metrics_specs = [
        tfma.MetricsSpec(
            metrics=[
                tfma.MetricConfig(
                    class_name='MeanSquaredError',
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(lower_bound={'value': 500.0})
                    )
                ),
                tfma.MetricConfig(
                    class_name='MeanAbsoluteError',
                    threshold=tfma.MetricThreshold(
                        value_threshold=tfma.GenericValueThreshold(lower_bound={'value': 50.0})
                    )
                )
            ]
        )
    ]

    evaluation_config = tfma.EvalConfig(
        model_specs=[tfma.ModelSpec(label_key=LABEL_KEY)],
        slicing_specs=[tfma.SlicingSpec()],
        metrics_specs=metrics_specs
    )

    evaluator = Evaluator(
        examples=transform.outputs["transformed_examples"],
        model=trainer.outputs["model"],
        baseline_model=resolver.outputs["model"],
        eval_config=evaluation_config
    )

    # Pusher
    pusher = Pusher(
        model=trainer.outputs["model"],
        model_blessing=evaluator.outputs["blessing"],
        push_destination=pusher_pb2.PushDestination(
            filesystem=pusher_pb2.PushDestination.Filesystem(base_directory=SERVING_MODEL_DIR)
        ),
    )

    # Pipeline
    return pipeline.Pipeline(
        pipeline_name=USERNAME,
        pipeline_root=PIPELINE_ROOT,
        metadata_connection_config=sqlite_metadata_connection_config(METADATA_PATH),
        components=[
            example_gen, statistics_gen, schema_gen, example_validator,
            transform, trainer, resolver, evaluator, pusher
        ],
        enable_cache=True,
    )


In [None]:
if __name__ == "__main__":
    logging.set_verbosity(logging.INFO)
    print("Memulai pipeline...")
    BeamDagRunner().run(create_pipeline())

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Generating ephemeral wheel package for '/content/muhrakhaal/nasdaq_transform.py' (including modules: ['trainer_module', 'nasdaq_transform']).
INFO:absl:User module package has hash fingerprint version cf7d2cb601bfa0bc71f1b669e4102f1db91a3afdf4b280232643a0ecc4216a04.
INFO:absl:Executing: ['/usr/bin/python3', '/tmp/tmpa876simp/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmp4uqxqnr5', '--dist-dir', '/tmp/tmpn5jg3tin']


Memulai pipeline...


INFO:absl:Successfully built user code wheel distribution at '/content/muhrakhaal/muhrakhaal_pipeline/_wheels/tfx_user_code_Transform-0.0+cf7d2cb601bfa0bc71f1b669e4102f1db91a3afdf4b280232643a0ecc4216a04-py3-none-any.whl'; target user module is 'nasdaq_transform'.
INFO:absl:Full user module path is 'nasdaq_transform@/content/muhrakhaal/muhrakhaal_pipeline/_wheels/tfx_user_code_Transform-0.0+cf7d2cb601bfa0bc71f1b669e4102f1db91a3afdf4b280232643a0ecc4216a04-py3-none-any.whl'
INFO:absl:Generating ephemeral wheel package for '/content/muhrakhaal/trainer_module.py' (including modules: ['trainer_module', 'nasdaq_transform']).
INFO:absl:User module package has hash fingerprint version cf7d2cb601bfa0bc71f1b669e4102f1db91a3afdf4b280232643a0ecc4216a04.
INFO:absl:Executing: ['/usr/bin/python3', '/tmp/tmppafodq4a/_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', '/tmp/tmpvt0_1e7r', '--dist-dir', '/tmp/tmpo8lwrvoi']
INFO:absl:Successfully built user code wheel distribution at '/content/muhrakha

INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Resolver depends on [].
INFO:absl:Node Resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[Resolver]', 'Run[Trainer]', 'Run[Transform]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl:Node Pusher is scheduled.
INFO:absl:node Resolver is running.
INFO

Epoch 1/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 6018343.0000 - mae: 2269.5225 - val_loss: 45680544.0000 - val_mae: 5152.9399
Epoch 2/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 94334000.0000 - mae: 8325.4355 - val_loss: 39671304.0000 - val_mae: 4775.1235
Epoch 3/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 29244412.0000 - mae: 4342.8101 - val_loss: 37749400.0000 - val_mae: 4444.2646
Epoch 4/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 5323607.5000 - mae: 1452.4708 - val_loss: 31926806.0000 - val_mae: 3722.2278
Epoch 5/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 67169072.0000 - mae: 6747.0542 - val_loss: 23638894.0000 - val_mae: 3266.5764
Epoch 6/100
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 14074120.0000 - mae: 2644.6726 - val_loss:

INFO:absl:Feature AAPL has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature AMZN has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature BRK_B has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature IXIC has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature MSFT has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Function `serve_tf_examples_fn` contains input name(s) resource with unsupported characters which will be renamed to sequential_1_output_1_add_readvariableop_resource in the SavedModel.
INFO:absl:Sharding callback duration: 13
INFO:absl:Sharding callback duration: 14
INFO:absl:Writing fingerprint to /content/muhrakhaal/muhrakhaal_pipeline/Trainer/model/7/Format-Serving/fingerprint.pb
INFO:absl:Training complete. Model written to /content/muhrakhaal/muhrakhaal_pipeline/Trainer/model/7/Format-Serving. ModelRun written to /content/muhrakhaal/muhrakhaal_pipeline/Trainer/model_run/7
INFO:absl

In [None]:
import shutil
from google.colab import files
folder_path = "/content/muhrakhaal"
zip_file_path = "/content/muhrakhaal.zip"
shutil.make_archive("/content/muhrakhaal", 'zip', folder_path)
files.download(zip_file_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>