# AgriAutoML Pipeline Execution

This notebook demonstrates how to execute the AgriAutoML pipeline in Vertex AI.

## 1. Setup and Authentication

First, install required packages and set up authentication.

In [None]:
!pip install -r ../requirements.txt

In [None]:
import os
from google.cloud import aiplatform
from google.cloud import storage
from datetime import datetime

# Set your project configuration
PROJECT_ID = "your-project-id"  # Replace with your project ID
REGION = "us-central1"          # Replace with your desired region
BUCKET_NAME = "your-bucket"     # Replace with your GCS bucket name

# Initialize Vertex AI
aiplatform.init(project=PROJECT_ID, location=REGION)

## 2. Prepare Sample Data

Upload sample data to GCS for pipeline execution.

In [None]:
def upload_sample_data():
    """Upload sample data to GCS bucket"""
    storage_client = storage.Client()
    bucket = storage_client.bucket(BUCKET_NAME)
    
    # Create sample data paths
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    vision_path = f"data/{timestamp}/vision"
    tabular_path = f"data/{timestamp}/tabular"
    
    # TODO: Replace with your actual data upload logic
    # For demonstration, we'll create placeholder files
    vision_blob = bucket.blob(f"{vision_path}/placeholder.txt")
    vision_blob.upload_from_string("Vision dataset placeholder")
    
    tabular_blob = bucket.blob(f"{tabular_path}/placeholder.csv")
    tabular_blob.upload_from_string("date,crop,yield\n2025-01-01,corn,150")
    
    return f"gs://{BUCKET_NAME}/{vision_path}", f"gs://{BUCKET_NAME}/{tabular_path}"

# Upload sample data
vision_uri, tabular_uri = upload_sample_data()
print(f"Vision data URI: {vision_uri}")
print(f"Tabular data URI: {tabular_uri}")

## 3. Configure Pipeline Parameters

In [None]:
# Pipeline parameters
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root"
PIPELINE_NAME = "agri-automl-pipeline"

parameter_values = {
    'project_id': PROJECT_ID,
    'region': REGION,
    'bucket_name': BUCKET_NAME,
    'vision_dataset_uri': vision_uri,
    'tabular_dataset_uri': tabular_uri,
    'min_accuracy': 0.8
}

## 4. Compile and Run Pipeline

In [None]:
from pipelines.agri_automl_pipeline import agri_automl_pipeline
from kfp.v2 import compiler

# Compile the pipeline
compiler.Compiler().compile(
    pipeline_func=agri_automl_pipeline,
    package_path='agri_automl_pipeline.json'
)

# Create and run the pipeline job
job = aiplatform.PipelineJob(
    display_name=PIPELINE_NAME,
    template_path="agri_automl_pipeline.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values=parameter_values
)

job.run(sync=True)

## 5. Monitor Pipeline Execution

After the pipeline starts, you can monitor its progress in the Vertex AI Console or using the code below.

In [None]:
def monitor_pipeline(job):
    """Monitor pipeline execution status"""
    print(f"Pipeline URL: {job.gca_resource.name}")
    print(f"State: {job.state}")
    
    if job.state == aiplatform.PipelineState.PIPELINE_STATE_SUCCEEDED:
        print("\nPipeline completed successfully!")
        # Get the pipeline outputs
        outputs = job.outputs
        print("\nOutputs:")
        for key, value in outputs.items():
            print(f"{key}: {value}")
    elif job.state == aiplatform.PipelineState.PIPELINE_STATE_FAILED:
        print("\nPipeline failed.")
        print(f"Error: {job.error}")

# Monitor the pipeline
monitor_pipeline(job)

## 6. Access Deployed Endpoints

After successful pipeline execution, you can access the deployed model endpoints.

In [None]:
def list_endpoints():
    """List all available endpoints"""
    endpoints = aiplatform.Endpoint.list()
    print("Available endpoints:")
    for endpoint in endpoints:
        print(f"\nName: {endpoint.display_name}")
        print(f"Resource name: {endpoint.resource_name}")
        print(f"Description: {endpoint.description}")

list_endpoints()

## 7. Test Predictions

Make test predictions using the deployed endpoints.

In [None]:
def predict_yield(endpoint_name, instance):
    """Make a prediction using deployed endpoint"""
    endpoint = aiplatform.Endpoint(endpoint_name)
    prediction = endpoint.predict([instance])
    return prediction

# Example prediction
sample_instance = {
    "location": "Iowa",
    "crop_type": "corn",
    "planting_date": "2025-04-15",
    "field_size": 5
}

# Get endpoint names from pipeline outputs
tabular_endpoint = job.outputs['deploy-tabular_endpoint']
prediction = predict_yield(tabular_endpoint, sample_instance)
print(f"Predicted yield: {prediction}")