### **03_video_insights.ipynb**
### **Video Insights Pipeline**

* ##### 01 - Install packages
* ##### 02 - Import packages
* ##### 03 - Create tasks
* ##### 04 - Create pipeline
* ##### 05 - Create pipeline yaml
* ##### 06 - Create pipeline run

### 01 - Install packages

In [None]:
!pip install kfp-tekton==1.5.9

### 02 - Import packages

In [None]:
import os
import sys
sys.path.append(os.path.dirname(os.getcwd()))

import kfp
import kfp_tekton

from components.delete_artifacts import delete_artifacts
from components.download_video   import download_video
from components.extract_audio    import extract_audio
from components.extract_speeches import extract_speeches
from components.prepare_video    import prepare_video
from components.upload_artifacts import upload_artifacts

### 03 - Create tasks

In [None]:
task_base_image = 'registry.access.redhat.com/ubi9/python-311'

In [None]:
download_video_op = kfp.components.create_component_from_func(
    func                = download_video,
    base_image          = task_base_image,
    packages_to_install = ['boto3==1.34.28']
)

In [None]:
prepare_video_op = kfp.components.create_component_from_func(
    func       = prepare_video,
    base_image = task_base_image
)

In [None]:
extract_audio_op = kfp.components.create_component_from_func(
    func                = extract_audio,
    base_image          = task_base_image,
    packages_to_install = ['moviepy==1.0.3']
)

In [None]:
extract_speeches_op = kfp.components.create_component_from_func(
    func                = extract_speeches,
    base_image          = task_base_image,
    packages_to_install = ['torch==2.1.2', 'transformers==4.37.1']
)

In [None]:
upload_artifacts_op = kfp.components.create_component_from_func(
    func                = upload_artifacts,
    base_image          = task_base_image,
    packages_to_install = ['boto3==1.34.28']
)

In [None]:
delete_artifacts_op = kfp.components.create_component_from_func(
    func       = delete_artifacts,
    base_image = task_base_image
)

### 04 - Create pipeline

In [None]:
pipeline_name        = '03_video_insights'
pipeline_description = 'Video Insights Pipeline'

In [None]:
@kfp.dsl.pipeline(
    name        = pipeline_name,
    description = pipeline_description
)
def pipeline(
    s3_service_name      : str,
    s3_endpoint_url      : str,
    s3_access_key_id     : str,
    s3_secret_access_key : str,
    s3_region            : str,
    s3_bucket            : str
):

    import os

    create_pvc_task = kfp.dsl.VolumeOp(
        name          = 'create_pvc',
        resource_name = 'pvc',
        size          = '1Gi',
        modes         = kfp.dsl.VOLUME_MODE_RWO
    )

    pvc_directory = os.path.join('/', 'pipeline')
    pvc_volume    = create_pvc_task.volume

    download_video_task = download_video_op(
        s3_service_name      = s3_service_name,
        s3_endpoint_url      = s3_endpoint_url,
        s3_access_key_id     = s3_access_key_id,
        s3_secret_access_key = s3_secret_access_key,
        s3_region            = s3_region,
        s3_bucket            = s3_bucket,
        pipeline_name        = pipeline_name
    )
    download_video_task.add_pvolumes({ pvc_directory : pvc_volume.after(create_pvc_task) })

    prepare_video_task = prepare_video_op()
    prepare_video_task.add_pvolumes({ pvc_directory : pvc_volume.after(download_video_task) })

    extract_audio_task = extract_audio_op()
    extract_audio_task.add_pvolumes({ pvc_directory : pvc_volume.after(prepare_video_task) })

    extract_speeches_task = extract_speeches_op()
    extract_speeches_task.add_pvolumes({ pvc_directory : pvc_volume.after(extract_audio_task) })

    upload_artifacts_task = upload_artifacts_op(
        s3_service_name      = s3_service_name,
        s3_endpoint_url      = s3_endpoint_url,
        s3_access_key_id     = s3_access_key_id,
        s3_secret_access_key = s3_secret_access_key,
        s3_region            = s3_region,
        s3_bucket            = s3_bucket,
        pipeline_name        = pipeline_name
    )
    upload_artifacts_task.add_pvolumes({ pvc_directory : pvc_volume.after(extract_speeches_task) })

    delete_artifacts_task = delete_artifacts_op()
    delete_artifacts_task.add_pvolumes({ pvc_directory : pvc_volume.after(upload_artifacts_task) })

### 05 - Create pipeline yaml

In [None]:
pipeline_package_path = os.path.join('yaml', f'{ pipeline_name }.yaml')

In [None]:
kfp_tekton.compiler.TektonCompiler().compile(
    pipeline_func = pipeline,
    package_path  = pipeline_package_path
)

### 06 - Create pipeline run

In [None]:
kubeflow_host  = '<kubeflow_host>'
kubeflow_token = '<kubeflow_token>'

In [None]:
pipeline_arguments = {
    's3_service_name'      : 's3',
    's3_endpoint_url'      : '<s3_endpoint_url>',
    's3_access_key_id'     : '<s3_access_key_id>',
    's3_secret_access_key' : '<s3_secret_access_key>',
    's3_region'            : '<s3_region>',
    's3_bucket'            : '<s3_bucket>',
}

In [None]:
kfp_tekton.TektonClient(host = kubeflow_host, existing_token = kubeflow_token).create_run_from_pipeline_package(
    pipeline_file = pipeline_package_path,
    arguments     = pipeline_arguments
)