In [1]:
import os
from typing import Text

from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

In [2]:
PIPELINE_NAME = "tfajarama-pipeline"
 
# pipeline inputs
DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = "modules/loan_approval_transform.py"
TRAINER_MODULE_FILE = "modules/loan_approval_trainer.py"
TUNER_MODULE_FILE = "modules/loan_approval_tuner.py"
 
# pipeline outputs
OUTPUT_BASE = "./"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

In [3]:
def init_local_pipeline(
    components, pipeline_root: Text
) -> pipeline.Pipeline:
    
    logging.info(f"Pipeline root set to: {pipeline_root}")
    beam_args = [
        "--direct_running_mode=multi_processing",
        # 0 auto-detect based on the number of CPUs available 
        # during execution time.
        "--direct_num_workers=0" 
    ]
    
    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        beam_pipeline_args=beam_args
    )

In [4]:
if __name__ == "__main__":
    # logging.set_verbosity(logging.INFO)
    logging.set_verbosity(logging.DEBUG)
    logging.info("Starting the pipeline...")
    
    from modules.components import init_components
    
    components = init_components(
        DATA_ROOT,
        transform_module=TRANSFORM_MODULE_FILE,
        # tuning_module=TUNER_MODULE_FILE,
        training_module=TRAINER_MODULE_FILE,
        training_steps=5000,
        eval_steps=1000,
        serving_model_dir=serving_model_dir,
    )
    
    pipeline = init_local_pipeline(components, pipeline_root)
    BeamDagRunner().run(pipeline=pipeline)

INFO:absl:Starting the pipeline...
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Pipeline root set to: ./tfajarama-pipeline
INFO:absl:Generating ephemeral wheel package for 'D:\\AI-ML_Project\\machine-learning-operations-mlops\\submissions\\submission-2-root\\submission_2\\modules\\loan_approval_transform.py' (including modules: ['components', 'loan_approval_trainer', 'loan_approval_transform', 'loan_approval_tuner']).
INFO:absl:User module package has hash fingerprint version 966723c8b13346963e08853a749e14f739ca1d67e4be7c845c57f88a8ae56359.
INFO:absl:Executing: ['D:\\AI-ML_Project\\machine-learning-operations-mlops\\submissions\\submission-2-root\\.venv\\Scripts\\python.exe', 'C:\\Users\\Asus\\AppData\\Local\\Temp\\tmp5uap_wxt\\_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', 'C:\\Users\\Asus\\AppData\\Local\\Temp\\tmpglh

INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Latest_blessed_model_resolver depends on [].
INFO:absl:Node Latest_blessed_model_resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[CsvExampleGen]', 'Run[Latest_blessed_model_resolver]', 'Run[Trainer]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl

Instructions for updating:
Use ref() instead.


Instructions for updating:
Use ref() instead.
INFO:absl:Feature previous_loan_defaults_on_file has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_gender has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_intent has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_education has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature cb_person_cred_hist_length has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature credit_score has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_amnt has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_int_rate has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_percent_income has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature loan_status has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature person_age has a shape dim

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 person_gender_xf (InputLayer)  [(None, 3)]          0           []                               
                                                                                                  
 person_education_xf (InputLaye  [(None, 6)]         0           []                               
 r)                                                                                               
                                                                                                  
 person_home_ownership_xf (Inpu  [(None, 5)]         0           []                               
 tLayer)                                                                                          
                                                                                              

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: ./tfajarama-pipeline\Trainer\model\7\Format-Serving\assets


INFO:tensorflow:Assets written to: ./tfajarama-pipeline\Trainer\model\7\Format-Serving\assets


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


INFO:absl:Training complete. Model written to ./tfajarama-pipeline\Trainer\model\7\Format-Serving. ModelRun written to ./tfajarama-pipeline\Trainer\model_run\7
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 7 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model_run': [Artifact(artifact: uri: "./tfajarama-pipeline\\Trainer\\model_run\\7"
, artifact_type: name: "ModelRun"
)], 'model': [Artifact(artifact: uri: "./tfajarama-pipeline\\Trainer\\model\\7"
, artifact_type: name: "Model"
base_type: MODEL
)]}) for execution 7
INFO:absl:MetadataStore with DB connection initialized
DEBUG:absl:ConnectionConfig: sqlite {
  filename_uri: "./tfajarama-pipeline\\metadata.sqlite"
  connection_mode: READWRITE_OPENCREATE
}

DEBUG:absl:Registering a metadata type with id 27.
DEBUG:absl:Registering a metadata type with id 28.
INFO:absl:node Trainer is finished.
INFO:absl:node Evaluator is running.
INFO:absl:R



INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.
INFO:absl:udf_utils.get_fn {'example_splits': 'null', 'fairness_indicator_thresholds': 'null', 'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "Precision"\n        },\n        {\n          "class_name": "Recall"\n        },\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "loan_status"\n    }\n  ],\n  "slicing_specs": [\n    {}\n  ]\n}'} 'custom_extractors'
INFO:absl:Request was made 



INFO:absl:Evaluation complete. Results written to ./tfajarama-pipeline\Evaluator\evaluation\8.
INFO:absl:Checking validation results.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:absl:Blessing result True written to ./tfajarama-pipeline\Evaluator\blessing\8.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 8 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'evaluation': [Artifact(artifact: uri: "./tfajarama-pipeline\\Evaluator\\evaluation\\8"
, artifact_type: name: "ModelEvaluation"
)], 'blessing': [Artifact(artifact: uri: "./tfajarama-pipeline\\Evaluator\\blessing\\8"
, artifact_type: name: "ModelBlessing"
)]}) for execution 8
INFO:absl:MetadataStore with DB connection initialized
DEBUG:absl:ConnectionConfig: sqlite {
  filename_uri: "./tfajarama-pipeline\\metadata.sqlite"
  connection_mode: READWRITE_OPENCREATE
}

DEBUG:absl:Registering a metadata type with id 30.
DEBUG:absl:Registering a metadata type with id 31.
INFO:absl:node Evaluator is finished.
INFO:absl:node Pusher is r

In [6]:
import tensorflow_model_analysis as tfma 

eval_result = './tfajarama-pipeline/Evaluator/evaluation/8/metrics-00000-of-00001.tfrecord'
tfma_result = tfma.load_eval_result(eval_result)
tfma_result

EvalResult(slicing_metrics=[((), {'': {'': {'binary_accuracy': {'doubleValue': 0.9120140953639467}, 'loss': {'doubleValue': 0.19296522438526154}, 'example_count': {'doubleValue': 9081.0}, 'auc': {'doubleValue': 0.9646401905449972}, 'precision': {'doubleValue': 0.8493771234428086}, 'recall': {'doubleValue': 0.7378258730939499}}}})], plots=[((), None)], attributions=[((), None)], config=None, data_location='', file_format='', model_location='')