# Component Test: Train with Model Job

## Author
- Sebastian Lehrig <sebastian.lehrig1@ibm.com>

## License
Apache-2.0 License

## Imports & Constants

In [62]:
import kfp
import kfp.dsl as dsl

%load_ext lab_black

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


In [54]:
BASE_IMAGE = "quay.io/ibm/kubeflow-notebook-image-ppc64le:latest"
KFP_CLIENT = kfp.Client()

with open("/var/run/secrets/kubernetes.io/serviceaccount/namespace") as f:
    NAMESPACE = f.read()
NAMESPACE

'user-example-com'

## Specify training

In [61]:
def train_model(text: str):
    import time

    seconds = 10
    print(f"Sleeping {seconds} seconds...")
    time.sleep(seconds)
    print(text)


train_specification = kfp.components.func_to_component_text(func=train_model)

In [56]:
train_comp = kfp.components.load_component_from_file("component.yaml")

## Create pipeline

In [59]:
@dsl.pipeline(
    name="Component Test - Train with Model Job", description="A simple component test"
)
def train_pipeline():
    train_parameters = {"text": "Hello training world!"}

    train_comp(
        train_dataset_dir="/tmp",
        validation_dataset_dir="/tmp",
        train_specification=train_specification,
        train_parameters=train_parameters,
    ).set_display_name("Ordinary Kubernetes Job")

    mpi_specification = {
        "distribution_type": "MPI",
        "number_of_workers": 2,
        "worker_cpus": "1",
        "worker_memory": "1Gi",
        "launcher_cpus": "1",
        "launcher_memory": "1Gi",
    }

    train_comp(
        train_dataset_dir="/tmp",
        validation_dataset_dir="/tmp",
        train_specification=train_specification,
        train_parameters=train_parameters,
        distribution_specification=mpi_specification,
    ).set_display_name("MPIJob")

## Run the pipeline within an experiment

In [60]:
KFP_CLIENT.create_run_from_pipeline_func(
    train_pipeline, arguments={}, namespace=NAMESPACE
)

RunPipelineResult(run_id=e2db9d42-b152-422b-b5e8-883bc5cc9066)