In [None]:
! pip install -Uqq requests PyYAML google-cloud-aiplatform kfp 
! pip install -Uqq git+https://github.com/gretelai/gretel-mlops.git

In [None]:
# Import necessary libraries
import json
import requests
import sys
import yaml

from google.cloud import aiplatform as aip
from google.colab import auth
from kfp import compiler

from gretel_mlops.gcp.vertexai.pipeline import (
    create_pipeline,
    get_pipeline_job_result,
)

## 1. Setup and Configuration for Vertex AI and GCP Services

- **Project and Region Setup**: Sets up the `PROJECT_ID` and `REGION` for the Vertex AI project. Defines the `BUCKET_URI` for Google Cloud Storage.
- **User Authentication**: Executes commands to authenticate the user's Google Cloud account, ensuring secure access to GCP services.
- **Bucket Creation**: Creates a new Google Cloud Storage bucket designated by `BUCKET_URI`, used for storing pipeline artifacts and data.
- **Service Account Retrieval**: Retrieves the project number and constructs the service account email. This service account will be used for operations within Vertex AI.
- **Service Account Permission Configuration**: Assigns necessary roles to the service account for object creation and viewing in the storage bucket. Ensures the service account has the required permissions for smooth operation.
- **Secret Manager Access**: Grants the service account access to the Secret Manager, allowing it to handle secrets, such as API keys, needed for secure operations.



In [None]:
# GCP Configuration
PROJECT_ID = "gretel-eng-sandbox"  # @param {type:"string"}
REGION = "us-central1"  # @param {type: "string"}
BUCKET_URI = f"gs://{PROJECT_ID}-vertex-pipeline"
GRETEL_SECRET_NAME = "GretelApiKey" # @param {type: "string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

# Authenticate your Google Cloud account
! gcloud auth login --no-launch-browser

# Authenticate
auth.authenticate_user()

# Create bucket
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

# Retrieve service account
shell_output = ! gcloud projects describe $PROJECT_ID
PROJECT_NUMBER = shell_output[-1].split(":")[1].strip().replace("'", "")
SERVICE_ACCOUNT = f"{PROJECT_NUMBER}-compute@developer.gserviceaccount.com"

# Set service account for Vertex AI Pipelines
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

# Grant Secret Manager Access to Service Account
! gcloud projects add-iam-policy-binding {PROJECT_ID} \
  --member="serviceAccount:{SERVICE_ACCOUNT}" \
  --role="roles/secretmanager.secretAccessor" \
  --condition=None


## 2. Fetch and load Gretel MLOps configuration from a YAML file

In [None]:
# URL of the raw YAML file
config_url = "https://raw.githubusercontent.com/gretelai/gretel-mlops/main/src/gretel_mlops/gcp/vertexai/configs/config_stroke.yaml"

# Get the content of the YAML file
response = requests.get(config_url)
config_dict = yaml.safe_load(response.text)

# Note uncomment below lines for Gretel Hybrid usage
# config_dict['gretel']['mode'] = 'hybrid'
# config_dict['gretel']['sink_bucket'] = 'gretel-hybrid-sandbox-sink' # your sink bucket name

# view config
yaml.dump(config_dict, sys.stdout, default_flow_style=False, sort_keys=False)

config = json.dumps(config_dict)

## 3. Build the pipeline


In [None]:
# Initialize the Vertex AI SDK for Python for the project and bucket.
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

In [None]:
# Create the pipeline

PIPELINE_NAME = "gretel-vertex-mlops-pipeline"  # @param {type: "string"}
MODEL_NAME = f"gretel-model-{config_dict['dataset']['name']}"
MODEL_IMAGE = "us-docker.pkg.dev/vertex-ai/training/xgboost-cpu.1-6:latest"
PIPELINE_ROOT = "{}/pipeline_root/control".format(BUCKET_URI)

pipeline = create_pipeline(
    PIPELINE_NAME,
    PIPELINE_ROOT,
    MODEL_NAME,
    MODEL_IMAGE,
    PROJECT_ID,
    REGION,
    GRETEL_SECRET_NAME,
    PROJECT_NUMBER,
    config,
)

# compile the pipeline
compiler.Compiler().compile(
    pipeline_func=pipeline, package_path=f"{PIPELINE_NAME}.yaml"
)

## 4. Submit the pipeline job

In [None]:
# Run the pipeline

job = aip.PipelineJob(
    display_name=PIPELINE_NAME,
    template_path=f"{PIPELINE_NAME}.yaml",
    pipeline_root=PIPELINE_ROOT,
)

job.run()

## 5. Inspect Evaluation Report

In [None]:
# Show Evaluation report

evaluation_report = get_pipeline_job_result(
    job_name=job.resource_name, project=PROJECT_ID, location=REGION
)

print("Evaluation metrics:")
print(evaluation_report)