<center><LARGE><STRONG>Vertex API Pipeline using KFP Components</STRONG></LARGE></center>
We will use Google cloud plateform to deploy the pipeline

# Instaling Required Packages

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform {USER_FLAG} -q
! pip3 install -U google-cloud-storage {USER_FLAG} -q
! pip3 install {USER_FLAG} kfp google-cloud-pipeline-components --upgrade -q

if os.getenv("IS_TESTING"):
    ! pip3 install --upgrade matplotlib $USER_FLAG -q

import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [None]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"

# Setup Google Cloud Project

In [None]:
PROJECT_ID = "[project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

In [None]:
REGION = "[your-region]"  # @param {type: "string"}

if REGION == "[your-region]":
    REGION = "us-central1"

In [None]:
import random
import string


# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))

UUID = generate_uuid()

## Authenticating Google Account

In [None]:
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS ''

## Create a Cloud Storage bucket

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + UUID
    BUCKET_URI = "gs://" + BUCKET_NAME

In [None]:
! gsutil mb -l $REGION $BUCKET_URI

In [None]:
! gsutil ls -al $BUCKET_URI

# Stting Up Variables
## Importing Libraries

In [None]:
import google.cloud.aiplatform as aip
from kfp.v2 import dsl
from kfp.v2.dsl import ClassificationMetrics, Metrics, Output, component

## Vertex AI Pipelines constants

In [None]:
PIPELINE_ROOT = "{}/pipeline_root/iris".format(BUCKET_URI)

## Initialize Vertex AI SDK for Python

In [None]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

# Defining Linear Regression pipeline components using Statsmodels

In [None]:
@component(
    packages_to_install=["statsmodels"],
    base_image="python:3.9",
    output_component_file="PriceElasticity.yaml",
)
def PriceElsticity(dataframe):
    # Importing Libraries
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    import seaborn as sns
    import statsmodels.api as sm
    
    # Running Linear Regression and computing Price Elasticity
    RegularPriceElasticity = []
    itemlist = []
    for item in list(np.unique(regularDF["ItemID"])):
        itemDF = regularDF[regularDF["ItemID"]==item]
        x = itemDF["Price"].tolist()
        y = itemDF["Volume"].tolist()
        x = sm.add_constant(x)
        model = sm.OLS(y,x)
        modelResult = model.fit()
        slope = modelResult.params[1]
        pricemean = itemDF["Price"].mean()
        volumemean = itemDF["Volume"].mean()
        elasticity = slope * (pricemean/volumemean)
        itemlist.append(item)
        RegularPriceElasticity.append(elasticity)
    RElasticityOfDemand = {"Items": itemlist, "Regular Price Elasticity":RegularPriceElasticity}
    RegularElasticityOfDemandDf = pd.DataFrame(RElasticityOfDemand)
    # Writing Price Elasticity data to csv file
    pd.to_csv(RegularElasticityOfDemandDf)
    # Creating Graph for Price Elasticity data
    ax = RegularElasticityOfDemandDf.plot(kind="barh", x='Items', y='Regular Price Elasticity', legend=False)
    ax.bar_label(ax.containers[0], color="red")
    ax.set_xlim(-11,4)
    plt.title("Regular Price Elasticity")
    plt.xlabel("Price Elasticity")
    plt.ylabel("Items")
    plt.savefig("RPriceElasticity.pdf")

# Defining Pipeline

In [None]:
IPELINE_NAME = "metrics-pipeline-v2"


@dsl.pipeline(
    # Default pipeline root. You can override it when submitting the pipeline.
    pipeline_root=PIPELINE_ROOT,
    # A name for the pipeline.
    name="metrics-pipeline-v2",)
def pipeline(seed: int, splits: int):
    PriceElasticity_op = PriceElsticity("data.csv")

# Compile the pipeline

In [None]:
from kfp.v2 import compiler  # noqa: F811

compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="PriceElasticity_pipeline.json".replace(" ", "_"),)

# Run the pipeline

In [None]:
DISPLAY_NAME = "PriceElasticity" + UUID

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="PriceElasticity_pipeline.json".replace(" ", "_"),
    job_id=f"PriceElasticity-v2{UUID}-1".replace(" ", ""),
    pipeline_root=PIPELINE_ROOT,
    parameter_values={"seed": 7, "splits": 10},
)

job.run()