# AgriAutoML Pipeline Execution in Vertex AI

This notebook demonstrates how to execute the AgriAutoML pipeline directly in Vertex AI Studio.

In [None]:
# Install packages with exact versions known to work together
!pip install "protobuf>=3.20.2,<4.0.0dev"
!pip install "google-cloud-storage>=1.32.0,<3.0.0"
!pip install "google-cloud-datastore==1.15.5"
!pip install "google-cloud-aiplatform==1.104.0"
!pip install "google-cloud-bigquery<3.0.0"
!pip install "pydantic>=2.0.0,<3.0.0"
!pip install "kfp>=2.0.0"
!pip install "google-genai>=1.25.0"

# Install remaining dependencies
!pip install pandas numpy pillow scikit-learn tensorflow \
    google-auth google-auth-httplib2 google-api-python-client

In [26]:
import os
from google.cloud import aiplatform
from google.cloud import storage
from google.auth import default
from datetime import datetime
from kfp import dsl, components, compiler
from google.cloud import aiplatform
from google.cloud.aiplatform import pipeline_jobs
from PIL import Image
import numpy as np 
import io
import pandas as pd

# Get default credentials and project
credentials, project_id = default()


# Configuration
REGION = "us-central1"
BUCKET_NAME = "qwiklabs-gcp-03-ef69e9acb66d-bucket"
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root"

In [27]:
# Create sample datasets
def create_sample_data():
    """Create sample datasets for vision and tabular models."""
    # Create sample vision data (dummy image)
    vision_uri = f"gs://{BUCKET_NAME}/sample_vision_data"
    
    # Create sample tabular data
    tabular_uri = f"gs://{BUCKET_NAME}/sample_tabular_data"
    
    # Initialize GCS client
    storage_client = storage.Client()
    bucket = storage_client.bucket(BUCKET_NAME)
    
    # Create and upload sample vision data
    img = Image.fromarray(np.random.randint(0, 255, (224, 224, 3), dtype=np.uint8))
    img_byte_arr = io.BytesIO()
    img.save(img_byte_arr, format='PNG')
    img_byte_arr = img_byte_arr.getvalue()
    
    vision_blob = bucket.blob('sample_vision_data/image1.png')
    vision_blob.upload_from_string(img_byte_arr, content_type='image/png')
    
    # Create and upload sample tabular data
    df = pd.DataFrame({
        'planting_date': pd.date_range(start='2025-01-01', periods=100),
        'temperature': np.random.normal(25, 5, 100),
        'rainfall': np.random.normal(50, 10, 100),
        'soil_quality': np.random.choice(['good', 'medium', 'poor'], 100),
        'yield': np.random.normal(75, 15, 100)
    })
    
    tabular_blob = bucket.blob('sample_tabular_data/farming_data.csv')
    tabular_blob.upload_from_string(df.to_csv(index=False))
    
    return vision_uri, tabular_uri

# Create the sample data and get the URIs
vision_uri, tabular_uri = create_sample_data()
print(f"Created vision dataset at: {vision_uri}")
print(f"Created tabular dataset at: {tabular_uri}")

Forbidden: 403 POST https://storage.googleapis.com/upload/storage/v1/b/qwiklabs-gcp-03-ef69e9acb66d-bucket/o?uploadType=multipart: {
  "error": {
    "code": 403,
    "message": "agrifin-service-account@agrifingcpflow-465809.iam.gserviceaccount.com does not have storage.objects.create access to the Google Cloud Storage object. Permission 'storage.objects.create' denied on resource (or it may not exist).",
    "errors": [
      {
        "message": "agrifin-service-account@agrifingcpflow-465809.iam.gserviceaccount.com does not have storage.objects.create access to the Google Cloud Storage object. Permission 'storage.objects.create' denied on resource (or it may not exist).",
        "domain": "global",
        "reason": "forbidden"
      }
    ]
  }
}
: ('Request failed with status code', 403, 'Expected one of', <HTTPStatus.OK: 200>)

In [28]:

# Get the absolute path to the components directory
COMPONENTS_DIR = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), 'components'))

# Function to get absolute component path
def get_component_path(component_name):
    return os.path.join(os.path.dirname(os.getcwd()), 'components', component_name)
    
# Load components
preprocess_op = components.load_component_from_file(get_component_path('preprocess.yaml'))
train_vision_op = components.load_component_from_file(get_component_path('train_vision.yaml'))
train_tabular_op = components.load_component_from_file(get_component_path('train_tabular.yaml'))
deploy_op = components.load_component_from_file(get_component_path('deploy.yaml'))

# Define pipeline
@dsl.pipeline(
    name='AgriAutoML Pipeline',
    description='End-to-end pipeline for agricultural yield prediction'
)
def agri_automl_pipeline(
    project_id: str,
    region: str,
    bucket_name: str,
    vision_dataset_uri: str,
    tabular_dataset_uri: str,
    min_accuracy: float = 0.8
):
    # Preprocess data
    preprocess_task = preprocess_op(
        vision_data=vision_dataset_uri,
        tabular_data=tabular_dataset_uri,
        bucket_name=bucket_name
    )

    # Train vision model
    train_vision_task = train_vision_op(
        project_id=project_id,
        region=region,
        dataset=preprocess_task.outputs['vision_dataset'],
        min_accuracy=min_accuracy
    )

    # Train tabular model
    train_tabular_task = train_tabular_op(
        project_id=project_id,
        region=region,
        dataset=preprocess_task.outputs['tabular_dataset'],
        min_accuracy=min_accuracy
    )

    # Deploy models
    deploy_task = deploy_op(
        project_id=project_id,
        region=region,
        vision_model=train_vision_task.outputs['model_info'],
        tabular_model=train_tabular_task.outputs['model_info']
    )

In [29]:
# Initialize Vertex AI
aiplatform.init(
    project=project_id,
    location=REGION,
    credentials=credentials
)


# Compile pipeline
compiler.Compiler().compile(
    pipeline_func=agri_automl_pipeline,
    package_path='pipeline.yaml'
)


# Create and run pipeline job
job = pipeline_jobs.PipelineJob(
    display_name='agri-automl-pipeline',
    template_path='pipeline.yaml',
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        'project_id': project_id,  # Changed from PROJECT_ID
        'region': REGION,
        'bucket_name': BUCKET_NAME,
        'vision_dataset_uri': vision_uri,  # Changed from VISION_DATASET_URI
        'tabular_dataset_uri': tabular_uri,  # Changed from TABULAR_DATASET_URI
        'min_accuracy': 0.8
    }
)

job.submit()

NameError: name 'vision_uri' is not defined