In [None]:
# @title Copyright & License (click to expand)
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI Pipelines and Evaluation of LLMs
Auto side-by-side evaluation: </br>
https://cloud.google.com/vertex-ai/generative-ai/docs/models/side-by-side-eval

## Overview

This tutorial demonstrates how to use the Vertex AI Pipelines for LLM Evaluation in a Question and Answer use case.

## Installation

Install the following packages required to execute this notebook.

In [2]:
! pip3 install --upgrade --force-reinstall google-cloud-aiplatform \
                         google-cloud-storage \
                         kfp \
                         google-cloud-pipeline-components

Collecting google-cloud-aiplatform
  Downloading google_cloud_aiplatform-1.44.0-py2.py3-none-any.whl.metadata (27 kB)
Collecting google-cloud-storage
  Downloading google_cloud_storage-2.16.0-py2.py3-none-any.whl.metadata (6.1 kB)
Collecting kfp
  Downloading kfp-2.7.0.tar.gz (441 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m441.8/441.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting google-cloud-pipeline-components
  Downloading google_cloud_pipeline_components-2.11.0-py3-none-any.whl.metadata (5.9 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1 (from google-api-core[grpc]!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*,<3.0.0dev,>=1.34.1->google-cloud-aiplatform)
  Downloading google_api_core-2.18.0-py3-none-any.whl.metadata (2.7 kB)
Collecting google-auth<3.0.0dev,>=2.14.1 (from google-cloud-aiplat

Check the package versions.

In [3]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"
! pip3 freeze | grep aiplatform
! pip3 freeze | grep components

KFP SDK version: 2.7.0
google-cloud-aiplatform==1.39.0
google-cloud-pipeline-components==2.11.0


## Before you begin Restart Kernel!

In [1]:
PROJECT = !gcloud config get-value project
PROJECT_ID = PROJECT[0]

# Set the project id
! gcloud config set project {PROJECT_ID}

Updated property [core/project].


#### Region

You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations).

In [2]:
REGION = "us-central1"

### Create a Cloud Storage bucket

Create a storage bucket to store intermediate artifacts such as datasets.

In [3]:
BUCKET_URI = f"gs://{PROJECT_ID}-llmops"  # @param {type:"string"}

In [13]:
BUCKET_URI

'gs://tadelle-372416-llmops'

**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [7]:
! gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_URI

Creating gs://tadelle-372416-llmops/...


#### Service Account 

You use a service account to create Vertex AI Pipeline jobs.

### Import libraries and define constants

In [4]:
import kfp
from kfp import compiler, dsl
from kfp.dsl import component, Input, Output, Artifact, InputPath, Dataset

In [5]:
from typing import Dict, List

In [6]:
from google.cloud import aiplatform

## Initialize Vertex AI SDK for Python

Initialize the Vertex AI SDK for Python for your project and corresponding bucket.

In [7]:
experiment_name = "genai-llm"

In [8]:
aiplatform.init(
    project=PROJECT_ID, 
    staging_bucket=BUCKET_URI,
    location=REGION,
    experiment=experiment_name
)

Creating Tensorboard
Create Tensorboard backing LRO: projects/81492352225/locations/us-central1/tensorboards/2043759019570495488/operations/4585743075929751552
Tensorboard created. Resource name: projects/81492352225/locations/us-central1/tensorboards/2043759019570495488
To use this Tensorboard in another session:
tb = aiplatform.Tensorboard('projects/81492352225/locations/us-central1/tensorboards/2043759019570495488')


In [314]:
# aiplatform.start_run(run="gemini-textbison-1")

In [9]:
@component(
    packages_to_install=["langchain", "langchain-google-vertexai", "pandas"],
    base_image="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-12.py310:latest" #TODO Find Relevant Base Image!!!,
)
def prompt_vertex_llm_apis(
    project_id: str,
    region_id: str,
    prompt_file: str,
    prompt_template: str,
    prompt_variables: List,
    model_name: str,
    model_endpoint: Input[Artifact],
    model_response: Output[Dataset]
):
    
    from langchain_google_vertexai import VertexAI
    from langchain_core.prompts import PromptTemplate
    import pandas as pd

    model_endpoint = model_endpoint.path
    
    prompt_df = pd.read_csv(prompt_file)
    for index, row in prompt_df.iterrows():
    
        print(row["id"], row["question"], row["context"])
    
        prompt = PromptTemplate(input_variables=prompt_variables, template=prompt_template) 
        prompt_formatted_str: str = prompt.format(
            question=row["question"],
            context=row["context"]
        )
    
        model = VertexAI(model_name=model_name)
        response = model(prompt_formatted_str)
        prompt_df.loc[index,'response'] = response
        
    prompt_df.to_csv(model_response.path, index=False)

    

In [10]:
@component(
    packages_to_install=["pandas_gbq", "pandas"],
    base_image="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-12.py310:latest" #TODO Find Relevant Base Image!!!
)
def eval_dataset(
    model_a_response: Input[Dataset],
    model_b_response: Input[Dataset],
    dataset_uri: str,
    project_id:str
    
):
    import pandas as pd
    import pandas_gbq
    input_prompt_df_a = pd.read_csv(model_a_response.path)
    input_prompt_df_b = pd.read_csv(model_b_response.path)
    input_prompt_df_a.rename(columns={"response": "model_a_response"}, inplace=True)
    input_prompt_df_b.rename(columns={"response": "model_b_response"}, inplace=True)
    eval_df = pd.merge(input_prompt_df_a,input_prompt_df_b)

    pandas_gbq.to_gbq(eval_df, 
                      dataset_uri, 
                      project_id=project_id,
                      if_exists='replace')
    

In [14]:
@component(
    packages_to_install=["google-cloud-aiplatform", "pandas"],
    base_image="us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-12.py310:latest" #TODO Find Relevant Base Image!!!
)
def auto_sxs_eval(
    dataset_uri:str,
    id_column: str,
    inference_instruction_column: str,
    inference_context_column: str,
    response_a_column: str,
    response_b_column: str,
    bucket_uri: str,
    project_id: str,
    region_id:str,
    judgements: Output[Dataset],
    summary_metrics: Output[Dataset],
    task: str='question_answer@latest'

):
    import os
    import pandas as pd
    from google.cloud import aiplatform
    
    parameters = {
        'evaluation_dataset': dataset_uri,
        'id_columns': [id_column],
        'task': task,
        'autorater_prompt_parameters': {
          'inference_instruction': {'column': inference_instruction_column},
          'inference_context': {'column': inference_context_column},
        },
        'response_column_a': 'model_a_response',
        'response_column_b': 'model_b_response',
    }

    aiplatform.init(project=project_id, location=region_id, staging_bucket=bucket_uri)

    job = aiplatform.PipelineJob(
        display_name='llm_eval',
        pipeline_root=os.path.join(bucket_uri, 'llm_eval'),
        template_path=('https://us-kfp.pkg.dev/ml-pipeline/google-cloud-registry/autosxs-template/default'),
        parameter_values=parameters,
    )
    job.run()
                                 
    for details in job.task_details:
        if details.task_name == "online-evaluation-pairwise":
            break

    # Judgments
    judgments_uri = details.outputs["judgments"].artifacts[0].uri
    judgments_df = pd.read_json(judgments_uri, lines=True)
    judgments_df.to_csv(judgements.path, index=False)
    
    for details in job.task_details:
        if details.task_name == "model-evaluation-text-generation-pairwise":
            break
    summary_metrics_df = pd.DataFrame([details.outputs["autosxs_metrics"].artifacts[0].metadata])
    summary_metrics_df.to_csv(summary_metrics.path, index=False)
    


In [19]:
@component
def get_model_artifact(
    model_id:str,
    project_id:str,
    region_id:str,
    model_artifact: Output[Artifact]
):
    model_endpoint = "https://us-central1-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{REGION_ID}/publishers/google/models/{MODEL_ID}".format(
    PROJECT_ID = project_id,
    REGION_ID = region_id,
    MODEL_ID = model_id
    )
    model_artifact.path = model_endpoint
    

In [36]:
prompt_template = "Given the context:\n {context}? \n Answer following question:\n  {question}"
prompt_variables = ["context", "question"]
prompt_file = "gs://tadelle-372416-llmops/prompts/prompts_question_context.csv"
DATASET_URI = "tadelle-372416.demo.llm_eval"

In [37]:
text_bison_id = 'text-bison@002'
gemini_bison_id = 'gemini-1.0-pro-001'

In [38]:
@dsl.pipeline(
    name="prompt_vertex_llm_apis",
)
def pipeline():
    """A demo llm pipeline."""

    gemini_artifact = get_model_artifact(
        model_id=gemini_bison_id,
        project_id=PROJECT_ID,
        region_id=REGION,
    )
    
    gemini = prompt_vertex_llm_apis(
        project_id=PROJECT_ID,
        region_id=REGION,
        prompt_file=prompt_file,
        prompt_template=prompt_template,
        prompt_variables=prompt_variables,
        model_name="gemini-pro",
        model_endpoint = gemini_artifact.outputs["model_artifact"]
    ).set_display_name("gemini-pro")
    
    text_bison_artifact = get_model_artifact(
        project_id=PROJECT_ID,
        region_id=REGION,
        model_id=text_bison_id
    )
    
    text_bison = prompt_vertex_llm_apis(
        project_id = PROJECT_ID,
        region_id=REGION,
        prompt_file=prompt_file,
        prompt_template=prompt_template,
        prompt_variables=prompt_variables,
        model_name="text-bison",
        model_endpoint = text_bison_artifact.outputs["model_artifact"]
    ).set_display_name("text-bison@002")
    
    eval_llm = eval_dataset(
        model_a_response=gemini.outputs["model_response"],
        model_b_response=text_bison.outputs["model_response"],
        dataset_uri=DATASET_URI,
        project_id=PROJECT_ID
    ).after(gemini, text_bison)
        
    auto_sxs_eval(
        dataset_uri=DATASET_URI,
        id_column="id",
        inference_instruction_column="question",
        inference_context_column="context",
        response_a_column="model_a_response",
        response_b_column="model_b_response",        
        project_id = PROJECT_ID,
        region_id=REGION,
        bucket_uri=BUCKET_URI,
        
    ).after(eval_llm)
    
  

In [39]:
compiler.Compiler().compile(pipeline_func=pipeline, package_path="llm-pipeline.yaml")

In [40]:
PIPELINE_ROOT=BUCKET_URI

In [41]:
job = aiplatform.PipelineJob(
    display_name="prompt_vertex_llm_apis",
    template_path="llm-pipeline.yaml",
    pipeline_root=PIPELINE_ROOT,
)

In [42]:
job.submit(
    experiment=experiment_name
)

Creating PipelineJob
PipelineJob created. Resource name: projects/81492352225/locations/us-central1/pipelineJobs/prompt-vertex-llm-apis-20240326182001
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/81492352225/locations/us-central1/pipelineJobs/prompt-vertex-llm-apis-20240326182001')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/prompt-vertex-llm-apis-20240326182001?project=81492352225
Associating projects/81492352225/locations/us-central1/pipelineJobs/prompt-vertex-llm-apis-20240326182001 to Experiment: genai-llm
