# Evaluate and Use Models from Model Garden

## Task 2. Prepare prompts for model evaluation

In [1]:
!pip install --upgrade --quiet google-cloud-aiplatform \
    google-cloud-logging openai 'anthropic[vertex]' httpx

### Setup and Initialize Vertex AI 

Run the following cells to install and import packages and initialize Vertex AI.

In [1]:
import google.cloud.logging
from google.cloud import aiplatform
from IPython.display import Markdown
import logging
import pandas as pd
import random

import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig
from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask,
    PairwiseMetric,
    PointwiseMetric,
)

# Sets Pandas to display content in wider columns
pd.set_option("display.max_colwidth", None)

# Do not remove logging section
client = google.cloud.logging.Client()
client.setup_logging()

# TODO Add your project and location to initialize vertxai
PROJECT_ID, = !gcloud config get project
LOCATION = "us-central1"

vertexai.init(project=PROJECT_ID, location=LOCATION)

pd.set_option("display.max_colwidth", None)

### Define Prompts for Evaluation

In [15]:
prompt_template = """
    You are a technical architect.

    Prepare a high-level project plan for how a solution to the
    following PROJECT REQUEST could be developed using Google Cloud.
    
    We don't want to begin projects that we can't deliver well in
    under four months, so also weigh in on whether you think this
    is a good time for us to attempt this project.
    
    PROJECT REQUEST:
    {project_request}
"""

In [16]:
project_requests = [
    """
    Inventory Management with Real-time Visibility.
    Retail division needs to track inventory across all physical stores
    and e-commerce channels with real-time accuracy. Key pain point
    is stockouts and overstock leading to lost sales and increased
    holding costs. They're looking for a cloud solution that integrates
    seamlessly with existing POS systems and offers predictive analytics
    for demand forecasting.
    """,
    """
    AI-Driven Material Selection & Performance Prediction.
    Product Development is interested in AI's ability to analyze material properties
    and predict their performance under various conditions within the
    context of their industrial designs. Wants assistance in identifying
    the most suitable and cost-effective materials early in the design process.
    """,
    """
    Optimize Yield & Reduce Input Costs.
    Agriculture department specifically mentioned a desire to increase crop
    output (e.g., by X%) while simultaneously decreasing expenses on water,
    fertilizer, and pesticides (e.g., by Y%) through more precise application
    and predictive insights.
    """,
    """
    Enhanced Predictive Maintenance for Fleet Operations.
    Distribution department expressed a strong need to leverage AI/ML on their
    cloud platform for proactive identification of potential component failures
    across their delivery fleet. Key drivers are reducing unscheduled downtime,
    optimizing maintenance schedules, and minimizing operational disruption.
    Requires ingestion of vast amounts of sensor data (IoT), flight logs, and
    maintenance records.
    """,
    """
    Inaccurate Vehicle Valuation.
    Distribution department struggles with inconsistent and often delayed real-time
    pricing for used vehicles, leading to lost profit margins and slower inventory
    turnover. Needs a solution that leverages AI to provide highly accurate,
    dynamic valuation based on a wide range of data points (market trends,
    condition, history, local demand).
    """

]

In [17]:
# TODO
# Update the code so that the variable `prompts` contains a list of
# strings, applying the `prompt_template` to each item in `project_requests`
prompts=[]
for request in project_requests:
    # Format the template with the current request
    full_prompt = prompt_template.format(project_request=request)
    prompts.append(full_prompt)

# print(prompts[0])

In [18]:
# Check your work

assert len(prompts) == 5, "You don't have a response for each project_request."
assert type(prompts[0]) == str, ("Make sure your prompts are strings.")

## Task 3. Get Llama Responses using the OpenAI SDK

In [21]:
from google.auth import default, transport
import openai

MAX_TOKENS = 4096

In [22]:
credentials, _ = default()
auth_request = transport.requests.Request()
credentials.refresh(auth_request)

In [23]:
LLAMA_LOCATION = "us-east5"
MAAS_ENDPOINT = f"{LLAMA_LOCATION}-aiplatform.googleapis.com"
LLAMA_MODEL_ID = "meta/llama-4-scout-17b-16e-instruct-maas"

# TODO
base_url = (
    f"https://{MAAS_ENDPOINT}/v1/projects/{PROJECT_ID}/"
    f"locations/{LLAMA_LOCATION}/endpoints/openapi"
)
# Implement an openai_cient to access the Llama 4 model
# using the endpoint and model ID defined above

openai_client = openai.OpenAI(
    base_url=base_url,
    api_key=credentials.token,
)

In [25]:
llama_responses = []

# TODO
# Update the code below to populate the list llama_responses
# with just the text string of Llama 4's response to each prompt

llama_responses = []

## ✍️ Update the code below to populate the list llama_responses
## with just the text string of Llama 4's response to each prompt

### TODO: Start Implementation ###
for prompt in prompts:
    # 1. Prepare the messages list for the Chat Completion API
    messages = [
        {"role": "user", "content": prompt}
    ]

    # 2. Call the Llama model via the OpenAI client
    try:
        completion = openai_client.chat.completions.create(
            model=LLAMA_MODEL_ID,
            messages=messages,
            max_tokens=MAX_TOKENS,
        )

        # 3. Extract the text string from the response and append to the list
        # The text content is found at: completion.choices[0].message.content
        response_text = completion.choices[0].message.content
        llama_responses.append(response_text)
        
    except Exception as e:
        print(f"An error occurred while processing prompt '{prompt}': {e}")
        llama_responses.append(f"ERROR: {e}")

In [27]:
# Check your work

assert len(prompts) == len(llama_responses), "You don't have a response for each prompt."
assert type(llama_responses[0]) == str, ("Extract just the text response, "
                                          "not the entire response object.")
# View one response
# Markdown(llama_responses[0])

## Task 4. Get Claude Responses using the Anthropic SDK

In [29]:
from anthropic import AnthropicVertex

In [30]:
CLAUDE_MODEL = "claude-sonnet-4@20250514"
CLAUDE_LOCATIONS = ["us-east5", "europe-west4", "GLOBAL"]
LOCATION= CLAUDE_LOCATIONS[0]
# TODO
# Implement an anthropic_client to access the Claude Sonnet 4 model
# using the model ID defined above


anthropic_client = AnthropicVertex(
    project_id=PROJECT_ID,
    region=LOCATION
)

In [31]:
claude_responses = []

# TODO
# Update the code below to populate the list claude_responses
# with just the text string of Clade Sonnet 4's response to each prompt

for prompt in prompts:
    try:
        # 1. Prepare the messages list
        messages = [
            {"role": "user", "content": prompt}
        ]

        # 2. Call the Claude model via the AnthropicVertex client
        message_response = anthropic_client.messages.create(
            model=CLAUDE_MODEL,
            max_tokens=256,
            messages=messages,
        )

        # 3. Extract the text content
        # The content is a list of TextBlock objects. We access the first block
        # and then the 'text' attribute.
        response_text = message_response.content[0].text
        claude_responses.append(response_text)
        
    except Exception as e:
        print(f"An error occurred while processing prompt '{prompt}': {e}")
        claude_responses.append(f"ERROR: {e}")

In [34]:
# Check your work

assert len(prompts) == len(claude_responses), "You don't have a response for each prompt."
assert type(claude_responses[0]) == str, ("Extract just the text response, "
                                          "not the entire response object.")

# View one response
# Markdown(claude_responses[0])

## Task 5. Conduct a pairwise evaluation

### Explore Metrics and Select Evaluation Criteria

In [None]:
MetricPromptTemplateExamples.list_example_metric_names()

In [None]:
print(MetricPromptTemplateExamples.Pairwise.TEXT_QUALITY.metric_prompt_template)

### Prepare and Execute the Evaluation Task

In [33]:
# TODO
# Provide all the variables required for the metric
# selected above

eval_dataset = pd.DataFrame({
    'prompt': prompts,
    'baseline_model_response': llama_responses,
    'response': claude_responses
})

In [35]:
selected_metric = MetricPromptTemplateExamples.Pairwise.TEXT_QUALITY

# TODO
# Complete the definition of the EvalTask using the dataset defined
# above and the selected metric. Keep the provided experiment name.

pairwise_eval_task =  EvalTask(
    dataset=eval_dataset,
    metrics=[selected_metric]
)

In [36]:
pairwise_result = pairwise_eval_task.evaluate()

Computing metrics with a total of 5 Vertex Gen AI Evaluation Service API requests.


100%|██████████| 5/5 [00:25<00:00,  5.06s/it]

All 5 metric requests are successfully computed.
Evaluation Took:25.364229331999923 seconds





In [37]:
# TODO
# Display the result's summary_metrics
pairwise_result.summary_metrics

{'row_count': 5,
 'pairwise_text_quality/candidate_model_win_rate': 0.0,
 'pairwise_text_quality/baseline_model_win_rate': 1.0}

In [38]:
# TODO
# Display the result's metrics_table table
pairwise_result.metrics_table

Unnamed: 0,prompt,baseline_model_response,response,pairwise_text_quality/explanation,pairwise_text_quality/pairwise_choice
0,"\n You are a technical architect.\n\n Prepare a high-level project plan for how a solution to the\n following PROJECT REQUEST could be developed using Google Cloud.\n \n We don't want to begin projects that we can't deliver well in\n under four months, so also weigh in on whether you think this\n is a good time for us to attempt this project.\n \n PROJECT REQUEST:\n \n Inventory Management with Real-time Visibility.\n Retail division needs to track inventory across all physical stores\n and e-commerce channels with real-time accuracy. Key pain point\n is stockouts and overstock leading to lost sales and increased\n holding costs. They're looking for a cloud solution that integrates\n seamlessly with existing POS systems and offers predictive analytics\n for demand forecasting.\n \n","**Project Plan: Inventory Management with Real-time Visibility on Google Cloud**\n\n**Project Overview**\n\nThe goal of this project is to design and implement a cloud-based inventory management solution with real-time visibility across physical stores and e-commerce channels. The solution should integrate seamlessly with existing POS systems and provide predictive analytics for demand forecasting.\n\n**High-Level Requirements**\n\n1. Real-time inventory tracking across physical stores and e-commerce channels\n2. Integration with existing POS systems\n3. Predictive analytics for demand forecasting\n4. Scalable and secure cloud-based infrastructure\n5. User-friendly interface for inventory management and reporting\n\n**Technical Approach**\n\nTo deliver this project on Google Cloud, we will leverage the following services:\n\n1. **Google Cloud IoT Core**: For real-time inventory tracking and device management\n2. **Google Cloud Pub/Sub**: For event-driven architecture and messaging\n3. **Google Cloud Bigtable**: For NoSQL database storage and real-time analytics\n4. **Google Cloud Dataflow**: For data processing and transformation\n5. **Google Cloud AI Platform**: For predictive analytics and demand forecasting\n6. **Google Cloud Storage**: For storing historical data and inventory records\n7. **Google Cloud Security**: For securing data and ensuring compliance\n\n**Project Timeline**\n\nAssuming a team of 4-6 people with relevant experience, I estimate the project timeline to be approximately 16 weeks (4 months). Here's a high-level breakdown:\n\n**Week 1-4: Planning and Design**\n\n* Define project scope, goals, and timelines\n* Conduct stakeholder interviews and gather requirements\n* Design the solution architecture and technical roadmap\n* Develop a detailed project plan and resource allocation plan\n\n**Week 5-8: Data Ingestion and Integration**\n\n* Develop data ingestion pipelines using Google Cloud IoT Core and Pub/Sub\n* Integrate with existing POS systems using APIs or file imports\n* Design and implement data processing and transformation using Dataflow\n\n**Week 9-12: Real-time Analytics and Predictive Modeling**\n\n* Develop real-time analytics using Bigtable and Dataflow\n* Build predictive models using Google Cloud AI Platform and historical data\n* Integrate predictive models with the inventory management system\n\n**Week 13-16: Development, Testing, and Deployment**\n\n* Develop the user interface and inventory management features\n* Conduct unit testing, integration testing, and UAT\n* Deploy the solution on Google Cloud and perform final testing\n\n**Feasibility and Risk Assessment**\n\nBased on the project requirements and technical approach, I believe this project can be delivered within the 4-month timeline. However, there are some potential risks and challenges to consider:\n\n* Integration with existing POS systems may be more complex than expected\n* Data quality and accuracy may impact the effectiveness of predictive analytics\n* Ensuring real-time visibility and scalability may require additional infrastructure resources\n\nTo mitigate these risks, I recommend:\n\n* Conducting thorough stakeholder interviews and requirements gathering\n* Developing a detailed project plan with clear milestones and deadlines\n* Establishing a robust testing and validation process\n\n**Conclusion**\n\nBased on the project plan and technical approach, I believe this is a good time to attempt this project. With careful planning, execution, and risk management, we can deliver a high-quality inventory management solution with real-time visibility on Google Cloud within the 4-month timeline.","# High-Level Project Plan: Real-time Inventory Management Solution\n\n## Executive Summary\nThis project is **feasible within 4 months** with proper scope management and phased delivery. The Google Cloud ecosystem provides robust, enterprise-ready services that can deliver immediate value while establishing a foundation for advanced analytics.\n\n## Project Phases & Timeline\n\n### Phase 1: Foundation & Integration (Weeks 1-6)\n**Objective**: Establish real-time data ingestion and basic visibility\n\n**Key Activities**:\n- Set up Google Cloud infrastructure (VPC, IAM, monitoring)\n- Implement **Pub/Sub** for real-time event streaming from POS systems\n- Deploy **Cloud SQL** or **Firestore** for transactional inventory data\n- Build initial data ingestion pipelines using **Dataflow**\n- Create basic inventory dashboard with **Looker Studio**\n- Establish API layer using **Cloud Run** or **App Engine**\n\n**Deliverables**: Real-time inventory tracking across all channels\n\n### Phase 2: Analytics & Intelligence (Weeks 7-10)\n**Objective**: Implement predictive analytics and optimization\n\n**Key","BASELINE response provides a complete high-level project plan, including a detailed timeline and a thorough feasibility and risk assessment, directly addressing all parts of the prompt. CANDIDATE response is incomplete, ending abruptly at Week 10 of a projected 16-week timeline, and thus fails to fully address the instruction to create a plan for a project under four months or provide a comprehensive assessment.",BASELINE
1,"\n You are a technical architect.\n\n Prepare a high-level project plan for how a solution to the\n following PROJECT REQUEST could be developed using Google Cloud.\n \n We don't want to begin projects that we can't deliver well in\n under four months, so also weigh in on whether you think this\n is a good time for us to attempt this project.\n \n PROJECT REQUEST:\n \n AI-Driven Material Selection & Performance Prediction.\n Product Development is interested in AI's ability to analyze material properties\n and predict their performance under various conditions within the\n context of their industrial designs. Wants assistance in identifying\n the most suitable and cost-effective materials early in the design process.\n \n","**Project Request:** AI-Driven Material Selection & Performance Prediction\n\n**High-Level Project Plan:**\n\n### Project Overview\n\nThe goal of this project is to develop an AI-driven solution using Google Cloud that analyzes material properties and predicts their performance under various conditions, enabling Product Development to identify the most suitable and cost-effective materials early in the design process.\n\n### Technical Requirements\n\n* Integrate with existing material database or create a new one\n* Develop a machine learning model to analyze material properties and predict performance\n* Train the model using historical data and material properties\n* Deploy the model as a RESTful API for easy integration with Product Development's design tools\n* Ensure scalability, security, and reliability of the solution\n\n### Technical Approach\n\n1. **Data Collection and Preparation** (Weeks 1-4)\n\t* Gather and integrate material property data from existing sources or create a new database\n\t* Clean, preprocess, and normalize the data for machine learning model training\n2. **Machine Learning Model Development** (Weeks 5-12)\n\t* Explore and select suitable machine learning algorithms (e.g., regression, neural networks) for material performance prediction\n\t* Train and validate the model using Google Cloud's AI Platform (AutoML, TensorFlow, or scikit-learn)\n\t* Fine-tune the model for optimal performance\n3. **Model Deployment and API Development** (Weeks 13-16)\n\t* Deploy the trained model as a containerized application on Google Kubernetes Engine (GKE) or Cloud Run\n\t* Develop a RESTful API using Cloud Endpoints or API Gateway for easy integration with Product Development's design tools\n4. **Integration and Testing** (Weeks 17-20)\n\t* Integrate the API with Product Development's design tools\n\t* Perform thorough testing, including unit testing, integration testing, and user acceptance testing (UAT)\n\n### Google Cloud Services\n\n* **Google Cloud Storage (GCS)**: Store and manage material property data\n* **Google Cloud Dataflow**: Process and preprocess data for machine learning model training\n* **Google Cloud AI Platform**: Develop, train, and deploy machine learning models\n* **Google Kubernetes Engine (GKE) or Cloud Run**: Deploy containerized applications\n* **Cloud Endpoints or API Gateway**: Develop and manage RESTful APIs\n\n### Project Timeline\n\nThe estimated project duration is 20 weeks (~5 months). However, considering the complexity of the project and the need for iterative testing and refinement, I recommend allocating 24 weeks (~6 months) to ensure successful delivery.\n\n### Feasibility and Recommendation\n\nGiven the project's technical requirements and complexity, I believe that attempting to deliver this project in under four months is ambitious but challenging. While it's possible to develop a minimal viable product (MVP) within this timeframe, I strongly recommend allocating more time to ensure the solution meets the required standards of quality, scalability, and reliability.\n\nBased on the project plan, I recommend **postponing the project start by 2-3 months** to allow for:\n\n1. More thorough planning and requirements gathering\n2. Adequate time for machine learning model development and fine-tuning\n3. Robust testing and validation of the solution\n\nThis will increase the chances of delivering a high-quality solution that meets the Product Development team's needs and provides a strong foundation for future enhancements and iterations.\n\n**Recommendation:** Postpone the project start by 2-3 months to ensure a successful delivery of the AI-Driven Material Selection & Performance Prediction solution using Google Cloud.","# AI-Driven Material Selection & Performance Prediction\n## High-Level Project Plan\n\n### **Executive Summary & Recommendation**\n\n**Recommendation: PROCEED with caution** - This project is feasible within 4 months but requires careful scope management and assumes availability of quality training data.\n\n**Key Success Factors:**\n- Quality and quantity of historical material performance data\n- Clear definition of ""performance conditions"" and success metrics\n- Strong domain expert collaboration throughout development\n\n---\n\n## **Project Architecture Overview**\n\n### **Core Components**\n1. **Data Ingestion & Management Layer** (Cloud Storage, BigQuery)\n2. **ML Training & Inference Platform** (Vertex AI)\n3. **Application Layer** (Cloud Run, App Engine)\n4. **Integration Layer** (Cloud Functions, Pub/Sub)\n\n---\n\n## **Phase-by-Phase Development Plan**\n\n### **Phase 1: Foundation & Data Assessment (Weeks 1-3)**\n\n**Objectives:**\n- Establish data infrastructure\n- Assess data quality and availability\n- Set up development environment\n\n**Technical Activities:**\n- Deploy BigQuery data warehouse for material properties and performance data\n-","BASELINE response provides a complete and detailed high-level project plan as requested, including a clear timeline and a thorough assessment of the project's feasibility within the given timeframe. CANDIDATE response, while starting well, fails to complete the phase-by-phase development plan, making it an incomplete response to the prompt's core instruction.",BASELINE
2,"\n You are a technical architect.\n\n Prepare a high-level project plan for how a solution to the\n following PROJECT REQUEST could be developed using Google Cloud.\n \n We don't want to begin projects that we can't deliver well in\n under four months, so also weigh in on whether you think this\n is a good time for us to attempt this project.\n \n PROJECT REQUEST:\n \n Optimize Yield & Reduce Input Costs.\n Agriculture department specifically mentioned a desire to increase crop\n output (e.g., by X%) while simultaneously decreasing expenses on water,\n fertilizer, and pesticides (e.g., by Y%) through more precise application\n and predictive insights.\n \n","**Project Title:** Precision Agriculture Optimization using Google Cloud\n\n**Project Overview:**\nThe goal of this project is to develop a data-driven solution that optimizes crop yields and reduces input costs (water, fertilizer, and pesticides) for the agriculture department. The solution will leverage Google Cloud technologies to provide predictive insights and enable precise application of resources.\n\n**High-Level Project Plan:**\n\n**Phase 1: Planning and Assessment (Weeks 1-4)**\n\n1. **Stakeholder Engagement**: Meet with agriculture department stakeholders to understand their specific requirements, existing infrastructure, and current pain points.\n2. **Data Assessment**: Identify existing data sources (e.g., weather, soil, crop, and sensor data) and determine data quality, availability, and gaps.\n3. **Define Key Performance Indicators (KPIs)**: Establish measurable goals for crop yield increase (X%) and input cost reduction (Y%).\n4. **Feasibility Study**: Conduct a feasibility study to determine the potential impact of the solution on crop yields and input costs.\n\n**Phase 2: Data Ingestion and Integration (Weeks 5-8)**\n\n1. **Data Ingestion**: Integrate with various data sources (e.g., weather APIs, IoT sensors, existing databases) to collect relevant data using Google Cloud's:\n\t* Cloud Dataflow for data processing and transformation\n\t* Cloud Storage for data warehousing\n2. **Data Integration**: Integrate data from various sources into a unified data platform using:\n\t* Google Cloud's Data Fusion for data integration and workflow management\n\t* Cloud Data Loss Prevention (DLP) for data security and compliance\n\n**Phase 3: Data Analysis and Modeling (Weeks 9-12)**\n\n1. **Data Analysis**: Perform exploratory data analysis and feature engineering using:\n\t* Google Cloud's BigQuery for data warehousing and SQL-like querying\n\t* Cloud AI Platform for machine learning and data science workflows\n2. **Predictive Modeling**: Develop predictive models using machine learning algorithms (e.g., regression, decision trees) to forecast crop yields and optimize resource application.\n\n**Phase 4: Solution Development and Deployment (Weeks 13-16)**\n\n1. **Solution Design**: Design a user-friendly interface for farmers and agricultural experts to access insights and recommendations using:\n\t* Google Cloud's App Engine for web application development\n\t* Cloud Endpoints for API management\n2. **Solution Deployment**: Deploy the solution on Google Cloud's infrastructure using:\n\t* Cloud Kubernetes Engine for container orchestration\n\t* Cloud Run for serverless deployment\n\n**Phase 5: Testing and Validation (Weeks 17-20)**\n\n1. **Testing**: Perform thorough testing of the solution, including unit testing, integration testing, and user acceptance testing (UAT).\n2. **Validation**: Validate the solution against the defined KPIs and stakeholder requirements.\n\n**Project Timeline:** Approximately 20 weeks (~5 months)\n\n**Feasibility and Recommendation:**\nGiven the project's scope and complexity, I believe it is feasible to deliver a minimum viable product (MVP) within the proposed 4-month timeline. However, to ensure success, I recommend:\n\n1. **Phased approach**: Prioritize key features and focus on delivering an MVP that addresses the most critical needs.\n2. **Stakeholder engagement**: Regularly engage with stakeholders to ensure that the solution meets their evolving needs and expectations.\n3. **Agile development**: Adopt an agile development methodology to accommodate changing requirements and new insights.\n\nAssuming a phased approach and agile development, I recommend proceeding with the project. With careful planning, execution, and stakeholder engagement, I am confident that we can deliver a successful solution that optimizes crop yields and reduces input costs using Google Cloud technologies.","# High-Level Project Plan: Agricultural Yield Optimization Platform\n\n## Executive Summary\nThis project aims to develop a precision agriculture platform using IoT sensors, satellite imagery, and ML models to optimize crop yields while reducing input costs. The solution leverages Google Cloud's AI/ML capabilities and IoT infrastructure.\n\n## Technical Architecture Overview\n\n### Core Components\n1. **Data Collection Layer**\n - IoT sensors (soil moisture, pH, nutrients, weather)\n - Satellite/drone imagery integration\n - Historical yield and input cost data\n\n2. **Data Processing & Storage**\n - Cloud IoT Core for device management\n - BigQuery for data warehousing\n - Cloud Storage for imagery/large datasets\n\n3. **AI/ML Pipeline**\n - Vertex AI for model development and deployment\n - Computer vision for crop health analysis\n - Predictive models for optimal input timing/quantities\n\n4. **Application Layer**\n - Web dashboard for farmers/agronomists\n - Mobile app for field operations\n - API for integration with existing farm management systems\n\n## Phase 1 (Month 1): Foundation & Data Pipeline\n**Duration: 4 weeks**","BASELINE response fully completes the project plan, including the crucial section on project feasibility and recommendation within the 4-month constraint, which was a key part of the prompt. CANDIDATE response is severely incomplete, cutting off abruptly after starting 'Phase 1' and thus failing to address the timeline and recommendation requirements at all.",BASELINE
3,"\n You are a technical architect.\n\n Prepare a high-level project plan for how a solution to the\n following PROJECT REQUEST could be developed using Google Cloud.\n \n We don't want to begin projects that we can't deliver well in\n under four months, so also weigh in on whether you think this\n is a good time for us to attempt this project.\n \n PROJECT REQUEST:\n \n Enhanced Predictive Maintenance for Fleet Operations.\n Distribution department expressed a strong need to leverage AI/ML on their\n cloud platform for proactive identification of potential component failures\n across their delivery fleet. Key drivers are reducing unscheduled downtime,\n optimizing maintenance schedules, and minimizing operational disruption.\n Requires ingestion of vast amounts of sensor data (IoT), flight logs, and\n maintenance records.\n \n","**Project Plan: Enhanced Predictive Maintenance for Fleet Operations on Google Cloud**\n\n**Project Overview**\n\nThe goal of this project is to develop a predictive maintenance solution using Google Cloud that leverages AI/ML to proactively identify potential component failures across the delivery fleet. The solution aims to reduce unscheduled downtime, optimize maintenance schedules, and minimize operational disruption.\n\n**High-Level Project Plan**\n\n**Phase 1: Planning and Assessment (Weeks 1-4)**\n\n1. **Requirements Gathering**: Collaborate with the distribution department to gather detailed requirements, including:\n\t* Types of sensor data (IoT) and data formats\n\t* Flight log and maintenance record data sources and formats\n\t* Expected outcome and key performance indicators (KPIs)\n2. **Data Assessment**: Evaluate the current state of the data, including:\n\t* Data quality and completeness\n\t* Data volume and velocity\n\t* Data storage and processing requirements\n3. **Solution Design**: Design a high-level architecture for the predictive maintenance solution, including:\n\t* Data ingestion and processing\n\t* Data storage and management\n\t* AI/ML model development and deployment\n\t* Integration with existing systems and workflows\n\n**Phase 2: Data Ingestion and Processing (Weeks 5-12)**\n\n1. **IoT Data Ingestion**: Set up Google Cloud IoT Core to ingest and process sensor data from the fleet\n2. **Data Processing**: Use Google Cloud Dataflow to process and transform the ingested data\n3. **Data Storage**: Design and implement a data storage solution using Google Cloud Storage and/or Google Cloud Bigtable\n\n**Phase 3: AI/ML Model Development and Deployment (Weeks 13-20)**\n\n1. **Data Preparation**: Prepare the processed data for AI/ML model training using Google Cloud Data Studio and/or Google Cloud AI Platform\n2. **Model Development**: Develop and train predictive maintenance models using Google Cloud AI Platform and/or AutoML\n3. **Model Deployment**: Deploy the trained models to Google Cloud AI Platform for real-time prediction and inference\n\n**Phase 4: Integration and Testing (Weeks 21-24)**\n\n1. **Integration**: Integrate the predictive maintenance solution with existing systems and workflows\n2. **Testing**: Perform thorough testing of the solution, including data ingestion, processing, and AI/ML model performance\n\n**Phase 5: Deployment and Maintenance (After Week 24)**\n\n1. **Deployment**: Deploy the solution to production\n2. **Monitoring and Maintenance**: Monitor the solution's performance and perform regular maintenance and updates\n\n**Feasibility and Timeline Assessment**\n\nBased on the project requirements and scope, I estimate that this project can be completed within a 6-month timeline. However, considering the complexity of the project and the need to deliver a high-quality solution, I recommend a 4-month timeline with a phased approach.\n\n**Risk Assessment and Recommendations**\n\n1. **Data Quality and Availability**: The project's success heavily relies on the quality and availability of the data. It is essential to ensure that the data is accurate, complete, and consistent.\n2. **AI/ML Model Complexity**: Developing and deploying AI/ML models can be complex and time-consuming. It is crucial to have experienced data scientists and engineers on board.\n3. **Integration with Existing Systems**: Integrating the predictive maintenance solution with existing systems and workflows can be challenging. It is essential to have a clear understanding of the existing infrastructure and workflows.\n\n**Conclusion**\n\nBased on the project plan and risk assessment, I recommend proceeding with the project. However, it is essential to:\n\n1. **Prioritize Data Quality and Availability**: Ensure that the data is accurate, complete, and consistent.\n2. **Assemble a Experienced Team**: Ensure that the team has the necessary skills and experience in AI/ML, data engineering, and Google Cloud.\n3. **Monitor Progress and Adjust**: Regularly monitor progress and adjust the project plan as needed to ensure successful delivery within the 4-month timeline.\n\n**Technical Recommendations**\n\n1. **Google Cloud Services**: Leverage Google Cloud IoT Core, Google Cloud Dataflow, Google Cloud Storage, Google Cloud Bigtable, Google Cloud AI Platform, and AutoML.\n2. **Data Engineering**: Use Apache Beam, Apache Spark, or Cloud Dataflow for data processing and transformation.\n3. **AI/ML Frameworks**: Use TensorFlow, scikit-learn, or PyTorch for AI/ML model development.\n\nBy following this high-level project plan and addressing the identified risks, we can deliver a successful predictive maintenance solution on Google Cloud within the 4-month timeline.","# Technical Architecture Plan: Enhanced Predictive Maintenance for Fleet Operations\n\n## Executive Summary & Timeline Assessment\n\n**Recommendation: PROCEED with phased approach**\n\nThis project is well-suited for a 4-month delivery window with a strategic phased implementation. Google Cloud's mature IoT and ML services significantly reduce development complexity, making this an opportune time to execute.\n\n## High-Level Architecture\n\n### Core Components\n```\nIoT Devices → Cloud IoT Core → Pub/Sub → Dataflow → BigQuery\n ↓\nStorage (Cloud Storage) ← Dataproc/Vertex AI ← Feature Store\n ↓\nVertex AI Pipelines → ML Models → Monitoring Dashboard\n```\n\n## 4-Month Implementation Plan\n\n### Phase 1 (Month 1): Foundation & Data Pipeline\n**Deliverables:**\n- IoT data ingestion pipeline using **Cloud IoT Core** + **Pub/Sub**\n- Real-time data processing with **Dataflow**\n- Data warehouse setup in **BigQuery**\n- Historical data migration from maintenance records\n\n**Key Services:**\n- Cloud IoT Core","BASELINE response provides a comprehensive high-level project plan, detailing phases, tasks, and relevant Google Cloud services, which is the primary request. Although its timeline assessment section contains a contradiction (estimating 6 months but recommending/concluding 4 months), it still presents a full plan. CANDIDATE response, while providing a clear timeline assessment and high-level architecture, fails significantly by only detailing 'Phase 1 (Month 1)' of a supposed 4-month plan, thus not delivering a complete project plan as requested.",BASELINE
4,"\n You are a technical architect.\n\n Prepare a high-level project plan for how a solution to the\n following PROJECT REQUEST could be developed using Google Cloud.\n \n We don't want to begin projects that we can't deliver well in\n under four months, so also weigh in on whether you think this\n is a good time for us to attempt this project.\n \n PROJECT REQUEST:\n \n Inaccurate Vehicle Valuation.\n Distribution department struggles with inconsistent and often delayed real-time\n pricing for used vehicles, leading to lost profit margins and slower inventory\n turnover. Needs a solution that leverages AI to provide highly accurate,\n dynamic valuation based on a wide range of data points (market trends,\n condition, history, local demand).\n \n","**Project Plan: AI-Powered Vehicle Valuation Solution using Google Cloud**\n\n**Project Overview**\n\nThe goal of this project is to develop an AI-powered vehicle valuation solution that provides accurate and dynamic pricing for used vehicles. The solution will leverage a wide range of data points, including market trends, condition, history, and local demand, to help the distribution department make informed decisions and optimize profit margins and inventory turnover.\n\n**Technical Requirements**\n\n1. **Data Ingestion**: Collect and integrate data from various sources, including:\n\t* Market trends (e.g., sales data, pricing information)\n\t* Vehicle condition (e.g., inspection reports, maintenance records)\n\t* Vehicle history (e.g., ownership, accidents, recalls)\n\t* Local demand (e.g., sales data, market research)\n2. **Data Processing and Storage**: Design a scalable data processing and storage architecture to handle large volumes of data from various sources.\n3. **Machine Learning**: Develop and train machine learning models to analyze data and generate accurate vehicle valuations.\n4. **Real-time Valuation**: Design a system to provide real-time vehicle valuations based on the trained models.\n5. **Integration**: Integrate the solution with existing systems, such as inventory management and sales platforms.\n\n**Google Cloud Services**\n\nThe following Google Cloud services will be used to develop the solution:\n\n1. **Google Cloud Storage (GCS)**: For data storage and processing.\n2. **Google Cloud Dataflow**: For data processing and ETL (Extract, Transform, Load).\n3. **Google Cloud Bigtable**: For NoSQL data storage and real-time data access.\n4. **Google Cloud AI Platform**: For machine learning model development, training, and deployment.\n5. **Google Cloud Functions**: For serverless computing and real-time valuation processing.\n\n**High-Level Project Plan**\n\n**Phase 1: Planning and Design (Weeks 1-4)**\n\n1. Define project scope, goals, and timelines.\n2. Identify data sources and develop a data ingestion plan.\n3. Design the overall architecture and select Google Cloud services.\n4. Develop a detailed project plan and resource allocation plan.\n\n**Phase 2: Data Ingestion and Processing (Weeks 5-12)**\n\n1. Develop data ingestion pipelines using Google Cloud Dataflow.\n2. Integrate data from various sources into Google Cloud Storage.\n3. Process and transform data using Google Cloud Dataflow.\n\n**Phase 3: Machine Learning Model Development (Weeks 13-20)**\n\n1. Develop and train machine learning models using Google Cloud AI Platform.\n2. Evaluate and refine models using historical data.\n\n**Phase 4: Real-time Valuation and Integration (Weeks 21-28)**\n\n1. Design and implement real-time valuation processing using Google Cloud Functions.\n2. Integrate the solution with existing systems.\n\n**Phase 5: Testing and Deployment (Weeks 29-32)**\n\n1. Perform thorough testing of the solution.\n2. Deploy the solution to production.\n\n**Project Timeline**\n\nThe estimated project duration is 32 weeks (~8 months). However, considering the complexity of the project and the need to deliver a high-quality solution, I recommend a phased approach with incremental deliveries.\n\n**Feasibility and Recommendation**\n\nGiven the project's complexity and the requirement to deliver a solution in under four months, I would recommend a phased approach with a focus on the most critical components. A minimum of 6-8 months is required to develop a robust and accurate AI-powered vehicle valuation solution.\n\nTo mitigate risks and ensure a successful project, I suggest:\n\n1. **Phased approach**: Deliver a basic version of the solution within 4 months, with subsequent phases enhancing the solution's accuracy and features.\n2. **Prioritize data quality and availability**: Ensure that high-quality data is available and properly processed before developing and training machine learning models.\n3. **Focus on MVP**: Develop a Minimum Viable Product (MVP) that provides a basic level of accuracy and can be improved upon with additional data and model refinements.\n\nBased on these considerations, I recommend proceeding with the project, but with a phased approach and a focus on delivering a high-quality solution that meets the business requirements.",# Technical Architecture Plan: AI-Powered Vehicle Valuation System\n\n## Executive Summary\n**Feasibility Assessment: ✅ PROCEED** \nThis project is well-suited for a 4-month timeline using Google Cloud's mature AI/ML services and automotive industry APIs.\n\n## High-Level Architecture\n\n### Core Components\n```\n┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐\n│ Data Sources │───▶│ Processing Layer │───▶│ ML/AI Engine │\n└─────────────────┘ └──────────────────┘ └─────────────────┘\n │ │ │\n ▼ ▼ ▼\n┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐\n│ External APIs │ │ Cloud Storage │ │ Real-time API │\n└─────────────────┘ └──────────────────┘,"BASELINE response provides a more realistic and detailed high-level project plan, including a thorough breakdown of technical requirements and mapping them to specific Google Cloud services. Its assessment of the 4-month delivery constraint is also more nuanced and realistic, suggesting a phased approach and MVP, which aligns better with the expert technical architect persona. CANDIDATE response's confident 'PROCEED' within 4 months for a complex AI project feels overly optimistic and less grounded in practical project management for a solution that needs to be delivered 'well'.",BASELINE


In [39]:
# Explore the first result's explanation
print(pairwise_result.metrics_table.loc[0,'pairwise_text_quality/explanation'])

BASELINE response provides a complete high-level project plan, including a detailed timeline and a thorough feasibility and risk assessment, directly addressing all parts of the prompt. CANDIDATE response is incomplete, ending abruptly at Week 10 of a projected 16-week timeline, and thus fails to fully address the instruction to create a plan for a project under four months or provide a comprehensive assessment.


## Test the CodeGemma deployment

In [41]:
PROJECT_NUMBER= 837112514786

In [None]:
import google.cloud.logging
import logging

from google.cloud import aiplatform

# Do not remove logging section
client = google.cloud.logging.Client()
client.setup_logging()


# Replace your project number and endpoint id
ENDPOINT_ID= ''

endpoint_resource_name="projects/PROJECT_NUMBER/locations/us-central1/endpoints/ENDPOINT_ID"
endpoint=aiplatform.Endpoint(endpoint_resource_name)


In [None]:

prompt = "Write a function to list n Fibonacci numbers in Python." 
max_tokens = 500 
temperature = 1.0 
top_p = 1.0  
top_k = 1 
raw_response = True

instances = [
    {
        "prompt": prompt,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "raw_response": raw_response
    },
]

response = endpoint.predict(
    instances=instances
)

# "<|file_separator|>" is the end of the file token.
for prediction in response.predictions:
    print(prediction.split("<|file_separator|>")[0])


print(response.deployed_model_id)

# Do not remove logging section
log_message = f"Fibonacci function: {response}"
logging.info(log_message)

## Perform Cleanup

In [None]:
# Delete all endpoints and their deployed models
endpoints = aiplatform.Endpoint.list()

if not endpoints:
    print("No endpoints found in this project/region.")
else:
    for endpoint in endpoints:
        print(f"\nEndpoint: {endpoint.display_name} ({endpoint.resource_name})")

        deployed_models = endpoint.list_models()
        for dm in deployed_models:
            print(f"  Undeploying model ID: {dm.id}")            
#             TODO Uncomment the statement below to undeploy a model
            endpoint.undeploy(deployed_model_id=dm.id, sync=True)
            print(f"  Undeployed model ID: {dm.id}")

        print("  Deleting endpoint...")
        endpoint.delete(sync=True)
        print(f"  Deleted endpoint: {endpoint.display_name}")

print("\nDeleting models...")
models = aiplatform.Model.list()

for model in models:
    print(f"  Deleting model: {model.display_name}")
    # TODO Uncomment the statement below to delete the model.
    model.delete(sync=True)
    print("  Deleted.")
    
# Do not remove logging section
log_message = "Deleted endpoints"
logging.info(log_message)
