In [None]:
!pip install --upgrade kfp[kubernetes]

In [None]:
from kfp          import kubernetes
from kfp.client   import Client
from kfp.compiler import Compiler
from kfp.dsl      import component, pipeline

In [None]:
TASK_BASE_IMAGE  = 'registry.access.redhat.com/ubi9/python-311'
TASK_IMAGE_BOTO3 = '<task_image_boto3>'

In [None]:
@component(
    base_image = TASK_IMAGE_BOTO3
)
def download_json(
    s3_endpoint_url      : str,
    s3_access_key_id     : str,
    s3_secret_access_key : str,
    s3_region_name       : str,
    s3_bucket            : str,
    s3_filename          : str,
    pvc_filename         : str
):

    from boto3   import client
    from os      import makedirs
    from os.path import dirname

    makedirs(dirname(pvc_filename))

    s3_client = client(
        service_name          = 's3',
        endpoint_url          = s3_endpoint_url,
        aws_access_key_id     = s3_access_key_id,
        aws_secret_access_key = s3_secret_access_key,
        region_name           = s3_region_name
    )

    s3_client.download_file(s3_bucket, s3_filename, pvc_filename)

In [None]:
PIPELINE_NAME              = 'Gerar Parecer'
PIPELINE_DESCRIPTION       = 'Utilizando IA para gerar parecer jurídico de infrações ambientais'
PIPELINE_YAML              = 'pipeline.yaml'
PIPELINE_PVC_STORAGE_CLASS = 'gp3-csi'

In [None]:
@pipeline(
    name        = PIPELINE_NAME,
    description = PIPELINE_DESCRIPTION
)
def pipeline(
    s3_endpoint_url      : str,
    s3_access_key_id     : str,
    s3_secret_access_key : str,
    s3_region_name       : str,
    s3_bucket            : str,
    s3_filename          : str,
    pvc_filename         : str
):

    from os.path import join

    # Task Create PVC

    create_pvc_task = kubernetes.CreatePVC(
        pvc_name_suffix    = '-pipeline-pvc',
        size               = '5Gi',
        access_modes       = ['ReadWriteOnce'],
        storage_class_name = PIPELINE_PVC_STORAGE_CLASS
    )

    pvc_directory = join('/', 'pipeline')
    pvc_name      = create_pvc_task.outputs['name']

    # Task Download JSON

    download_json_task = download_json(
        s3_endpoint_url      = s3_endpoint_url,
        s3_access_key_id     = s3_access_key_id,
        s3_secret_access_key = s3_secret_access_key,
        s3_region_name       = s3_region_name,
        s3_bucket            = s3_bucket,
        s3_filename          = s3_filename,
        pvc_filename         = pvc_filename
    )

    kubernetes.mount_pvc(
        task       = download_json_task,
        pvc_name   = pvc_name,
        mount_path = pvc_directory
    )

    download_json_task.after(create_pvc_task)

In [None]:
Compiler().compile(pipeline, PIPELINE_YAML)

In [None]:
KUBEFLOW_HOST = '<kubeflow_host>'

PIPELINE_ARGUMENTS = {
    's3_endpoint_url'      : '<s3_endpoint_url>',
    's3_access_key_id'     : '<s3_access_key_id>',
    's3_secret_access_key' : '<s3_secret_access_key>',
    's3_region_name'       : '<s3_region_name>',
    's3_bucket'            : '<s3_bucket>',
    's3_filename'          : '<s3_filename>',
    'pvc_filename'         : '<pvc_filename>',
}

In [None]:
Client(host = KUBEFLOW_HOST).create_run_from_pipeline_package(
    pipeline_file = PIPELINE_YAML,
    arguments     = PIPELINE_ARGUMENTS
)