In [1]:
import openai
import json

In [2]:
SYSTEM_PREFIX = """You are an AI Python developer assistant.
- You are building a Vertex AI Pipeline component that takes an input and produces an output based on user requested actions.
- The `google-cloud-aiplatform` and `openai` packages are already installed.
- Follow the user's instructions carefully & to the letter.
- Minimize any other prose.
- The component has to follow the template and only add the following sections:`COMPONENT_NAME`, `CONTAINER_IMAGE`, `PACKAGES_TO_INSTALL`, `INPUT_VARIABLES`, `INPUT_VARIABLES_TYPE`, `INPUT_VARIABLES_DESCRIPTION`, `RETURN_VARIABLES`, `RETURN_VARIABLES_TYPE`, `RETURN_VARIABLES_DESCRIPTION`, `COMPONENT_CODE`, `BRIEF_COMPONENT_DESCRIPTION`.

"""

COMPONENT_TEMPLATE_PROMPT="""
Follow the following template to generate the code for your component:
```
from kfp.v2.dsl import Dataset, Input, Output, Model, component
from pathlib import Path


@component(
    base_image="{CONTAINER_IMAGE}",
    packages_to_install=[{PACKAGES_TO_INSTALL}],
    output_component_file=str(Path(__file__).with_suffix(".yaml")),
)
def {COMPONENT_NAME}(
    {INPUT_VARIABLES}: {INPUT_VARIABLES_TYPE},
) -> None:
    \"""{BRIEF_COMPONENT_DESCRIPTION}

    Args:
        {INPUT_VARIABLES} ({INPUT_VARIABLES_TYPE}): {INPUT_VARIABLES_DESCRIPTION}

    Returns:
        {RETURN_VARIABLES} ({RETURN_VARIABLES_TYPE}): {RETURN_VARIABLES_DESCRIPTION}
    \"""
    import logging

    logging.getLogger().setLevel(logging.INFO)
    import {PACKAGES_TO_INSTALL}

    {COMPONENT_CODE}
    
```
"""

PIPELINE_TEMPLATE_PROMPT="""
Follow the following template to generate an example pipeline to run your component:
```
from kfp.v2.dsl import pipeline

@pipeline(
    name="{PIPELINE_NAME}",
    description="",
    pipeline_root=""
)
def {COMPONENT_NAME}_pipeline(
    {INPUT_VARIABLES}: {INPUT_VARIABLES_TYPE},
):
    "A pipeline that runs {COMPONENT_NAME}"
    {PIPELINE_CODE}

pipeline_arguments={EXAMPLE_PIPELINE_ARGUMENTS}

client = kfp.Client()
client.create_run_from_pipeline_func(
    {PIPELINE_NAME},
    arguments={EXAMPLE_PIPELINE_ARGUMENTS},
    mode=kfp.dsl.PipelineExecutionMode.V2_COMPATIBLE
)

```
"""

In [3]:
message = "Write a component that takes a GCS path containing multiple images (either .png or .json) {image_path: string} from a GCS bucket and uses grabcut to segment the images into a foreground and background. The component should output the foreground and background images to the GCS bucket in folder path {output_gcs_folder: string}."

message_list = [
    {"role": "system", "content": COMPONENT_TEMPLATE_PROMPT},
    {"role": "user", "content": message}
]

In [4]:
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=message_list,
    n=3
)


In [5]:
get_code = lambda choice_config : choice_config["message"]["content"].split("```")[1].removeprefix("\n")

In [6]:
messages_5shot = [get_code(response["choices"][i]) for i in range(len(response["choices"]))]

In [7]:
messages_5shot

['from kfp.v2.dsl import Dataset, Input, Output, Model, component\nfrom pathlib import Path\nfrom typing import List\nimport os\nfrom google.cloud import storage\nimport cv2\nimport numpy as np\n\n\n@component(\n    base_image="python:3.9",\n    packages_to_install=["google-cloud-storage", "opencv-python-headless"],\n    output_component_file=str(Path(__file__).with_suffix(".yaml")),\n)\ndef image_segmentation(\n    image_path: str,\n    output_gcs_folder: str\n) -> None:\n    """This component segments foreground and background from input images using grabcut algorithm.\n\n    Args:\n        image_path (str): Path of the GCS bucket containing the input images.\n        output_gcs_folder (str): Path of the GCS bucket to save the output images.\n\n    Returns:\n        None\n    """\n\n    # Create a client object and load input images from the gcs bucket\n    client = storage.Client()\n    bucket_name = image_path.split("/")[2]\n    bucket = client.bucket(bucket_name)\n    blob_list = 

In [8]:
snippet_txt = "\n\n".join([
    f"snippet: {i+1}.\n```{messages_5shot[i]}```" for i in range(len(messages_5shot))
])

In [9]:
print(snippet_txt)

snippet: 1.
```from kfp.v2.dsl import Dataset, Input, Output, Model, component
from pathlib import Path
from typing import List
import os
from google.cloud import storage
import cv2
import numpy as np


@component(
    base_image="python:3.9",
    packages_to_install=["google-cloud-storage", "opencv-python-headless"],
    output_component_file=str(Path(__file__).with_suffix(".yaml")),
)
def image_segmentation(
    image_path: str,
    output_gcs_folder: str
) -> None:
    """This component segments foreground and background from input images using grabcut algorithm.

    Args:
        image_path (str): Path of the GCS bucket containing the input images.
        output_gcs_folder (str): Path of the GCS bucket to save the output images.

    Returns:
        None
    """

    # Create a client object and load input images from the gcs bucket
    client = storage.Client()
    bucket_name = image_path.split("/")[2]
    bucket = client.bucket(bucket_name)
    blob_list = list(bucket.list_bl

In [29]:
REVIEW_NSHOT_PROMPT = """
Review the following kfp component code snippets and return a snippet_name, accuracy_score and accuracy_summary for each component.

The accuracy_score should be in percentage format and based on how closely the snippets follows:
- Ability to complete the users request
- The correct python packages have been added to `packages_to_install` and then imported in the component method code.
- System kfp component template
- Penalise for importing component packages at the top of the snippet and not in the component method.
- Penalise for not importing the logging package in the component
"""


REVIEW_RETURN_STRUCT = """
--------
Return the message in the following JSON list structure

[
  {
    "snippet_name": int,
    "accuracy_score": int,
    "accuracy_summary": string
  }
]
"""

In [30]:
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": REVIEW_NSHOT_PROMPT},
        {"role": "user", "content": f"{snippet_txt}\n{REVIEW_RETURN_STRUCT}"}
    ],
)

In [31]:
review_config_raw = response["choices"][0]["message"]["content"]
print(review_config_raw)

[
  {
    "snippet_name": 1,
    "accuracy_score": 90,
    "accuracy_summary": "This snippet uses the correct packages and imports and follows the KFP system component template. However, some of the system imports should be moved inside the component method instead of being imported at the top of the file."
  },
  {
    "snippet_name": 2,
    "accuracy_score": 100,
    "accuracy_summary": "This snippet correctly adds the necessary packages and imports inside the component method. It also follows the KFP system component template."
  },
  {
    "snippet_name": 3,
    "accuracy_score": 70,
    "accuracy_summary": "This snippet uses the correct packages and follows the KFP system component template, however it does not import the logging package and should avoid importing the packages to the top of the file. Therefore, the code could be improved."
  }
]


In [46]:
review_config_raw = '[\n  {\n    "snippet_name": 1,\n    "accuracy_score": 90,\n    "accuracy_summary": "This snippet uses the correct packages and imports and follows the KFP system component template. However, some of the system imports should be moved inside the component method instead of being imported at the top of the file."\n  },\n  {\n    "snippet_name": 2,\n    "accuracy_score": 100,\n    "accuracy_summary": "This snippet correctly adds the necessary packages and imports inside the component method. It also follows the KFP system component template."\n  },\n  {\n    "snippet_name": 3,\n    "accuracy_score": 70,\n    "accuracy_summary": "This snippet uses the correct packages and follows the KFP system component template, however it does not import the logging package and should avoid importing the packages to the top of the file. Therefore, the code could be improved."\n  }\n]]'

In [47]:
review_config = json.loads(review_config_raw)
review_config

JSONDecodeError: Extra data: line 17 column 2 (char 862)

In [36]:
res = {
    val["accuracy_score"]: val["snippet_name"] for val in review_config
}
res

{90: 1, 100: 2, 70: 3}

In [44]:
print(f"Recommended component snippet:\n{messages_5shot[res[sorted(res)[-1]] - 1]}")

Recommended component snippet:
from kfp.v2.dsl import Dataset, Input, Output, Model, component
from pathlib import Path

@component(
    base_image="python:3.9",
    packages_to_install=["google-cloud-storage", "opencv-python-headless"],
    output_component_file=str(Path(__file__).with_suffix(".yaml")),
)
def grabcut_segmentation(
    image_path: str,
    output_gcs_folder: str,
) -> None:
    """Segments images into foreground and background using GrabCut algorithm.

    Args:
        image_path (str): GCS path to the folder containing input images (.png or .jpg).
        output_gcs_folder (str): GCS path of directory to store segmented images.

    Returns:
        None
    """

    import os
    import logging
    import cv2
    import numpy as np
    import google.cloud.storage as gcs
    
    logging.getLogger().setLevel(logging.INFO)

    # Helper function to download file from GCS bucket.
    def download_blob(bucket_name, source_blob_name, destination_file_name):
        loggi