In [16]:
from kfp.v2.dsl import component, pipeline
from kfp.v2.dsl import Dataset, Output, Input, Metrics, Markdown, Artifact
from kfp.v2 import compiler

In [17]:
@component(
    base_image="gcr.io/ml-ops-segment-anything/sam:latest"
)
def batch_prediction(
    image_dir: str,
    visualization: Output[Markdown],
    
):
    import torch
    from typing import Dict, List
    from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
    import base64
    import numpy as np
    import cv2
    import logging
    from google.cloud import storage
    import base64
    import json
    
    storage_client = storage.Client()
    bucket = storage_client.bucket('sam-pipeline-test')
    
    # Initialize Model
    blob = bucket.blob('model_artifacts/sam_vit_b_01ec64.pth')
    blob.download_to_filename('sam_vit_b_01ec64.pth')
    sam = sam_model_registry["vit_b"](checkpoint="sam_vit_b_01ec64.pth")
    print(torch.cuda.is_available())
    sam.to("cuda")
    mask_generator = SamAutomaticMaskGenerator(sam)
    
    # Initialize Images
    blobs = bucket.list_blobs(prefix=image_dir)
    image_extensions = ('.png', '.jpg', '.jpeg')
    image_blobs = [blob for blob in blobs if blob.name.lower().endswith(image_extensions)]
    
    # Predict all images
    results = []
    for image_blob in image_blobs:
        image_bytes = image_blob.download_as_bytes()
        image_base64 = base64.b64encode(image_bytes).decode('utf-8')
        print(image_blob.name)
        # TODO image resizing
        jpg_as_np = np.frombuffer(image_bytes, dtype=np.uint8)
        image = cv2.cvtColor(cv2.imdecode(jpg_as_np, flags=1), cv2.COLOR_BGR2RGB)
        masks = mask_generator.generate(image)
        prediction = {}
        prediction["file_path"] = image_blob.name
        prediction["base64"] = image_base64
        prediction["masks"] = {}
        sorted_masks = sorted(masks, key=(lambda x: x['area']), reverse=True)
        for idx, mask in enumerate(sorted_masks):
            # TODO: Rewrite the result format, add more related scores and save it into a single json, with mask index
            # e.g. : {"mask_1": ..., "mask_2": ...}
            prediction["masks"][f'mask_{idx}'] = mask['segmentation'].tolist()
        results.append(prediction)
        torch.cuda.empty_cache()
    
    # TODO: Use Kubeflow Output to save json

    # TODO: Optimize the visualization
    with open(visualization.path, 'w') as f:
        for result in results:
            f.write(f"# {result['file_path']} \n")
            for mask_name, mask in result["masks"].items():
                f.write(f"## {mask_name} \n")
                f.write("<table><tr>")
                f.write(f'<td><img src="data:image/png;base64,{result["base64"]}" width=100% align="left"></td>')
                # TODO: Image Base64 with each single mask, with width=45% and align='right'
                f.write(f'<td><img src="data:image/png;base64,{result["base64"]}" width=100% align="right"></td>')
                f.write("</tr></table>\n\n")
                break
            f.write(f"## All Masks \n")
            f.write("<table><tr>")
            f.write(f'<td><img src="data:image/png;base64,{result["base64"]}" width=100% align="left"></td>')
            # TODO: Image Base64 with all masks, with width=45% and align='right'
            f.write(f'<td><img src="data:image/png;base64,{result["base64"]}" width=100% align="right"></td>')
            f.write("</tr></table>\n\n")

In [18]:
# Pipeline Initialization
@pipeline(
    pipeline_root="gs://sam-pipeline-test",
    name="sam-pipeline-test",
)
def sam_pipeline(
    image_dir: str = "batch_1"
):
    get_batch_prediction_op = (batch_prediction(image_dir=image_dir)
        .set_cpu_limit("8")
        .set_memory_limit("64G")
        .add_node_selector_constraint("cloud.google.com/gke-accelerator", "NVIDIA_TESLA_T4")
        .set_gpu_limit(1)
    )

In [14]:
compiler.Compiler().compile(
    pipeline_func=sam_pipeline,
    package_path='sam_pipe_test.json')



In [5]:
!gsutil cp sam_pipe_test.json gs://sam-pipeline-test

Copying file://sam_pipe_test.json [Content-Type=application/json]...
/ [1 files][  6.1 KiB/  6.1 KiB]                                                
Operation completed over 1 objects/6.1 KiB.                                      


In [15]:
from google.cloud import aiplatform

job = aiplatform.PipelineJob(display_name = 'sam_test-1',
                             template_path = 'sam_pipe_test.json',
                             enable_caching = False,
                             # failure_policy = "slow",
                             project="ml-ops-segment-anything",
                             location="us-west1",
                            )

job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/633534855904/locations/us-west1/pipelineJobs/sam-pipeline-test-20230719163048
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/633534855904/locations/us-west1/pipelineJobs/sam-pipeline-test-20230719163048')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-west1/pipelines/runs/sam-pipeline-test-20230719163048?project=633534855904


In [64]:
!pip install kfp==1.8.22

Collecting kfp==1.8.22
  Downloading kfp-1.8.22.tar.gz (304 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.9/304.9 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: kfp
  Building wheel for kfp (setup.py) ... [?25ldone
[?25h  Created wheel for kfp: filename=kfp-1.8.22-py3-none-any.whl size=426971 sha256=76abdae182cf74a0ba6b45e327f7bbd65044fa748967d5865e8d6bcf5aa660bb
  Stored in directory: /home/jupyter/.cache/pip/wheels/74/c0/fc/bf0ab209fd6ae814d7efbc821076e948c3e4884f846583ab58
Successfully built kfp
Installing collected packages: kfp
  Attempting uninstall: kfp
    Found existing installation: kfp 1.8.20
    Uninstalling kfp-1.8.20:
      Successfully uninstalled kfp-1.8.20
Successfully installed kfp-1.8.22


In [17]:
!pip list | grep "kfp"

kfp                                    1.8.22
kfp-pipeline-spec                      0.1.16
kfp-server-api                         1.8.5
