# AgriAutoML Pipeline Execution in Vertex AI

This notebook demonstrates how to execute the AgriAutoML pipeline directly in Vertex AI Studio.

In [1]:
!pip install google-cloud-aiplatform kfp google-cloud-storage pandas pillow numpy



In [None]:
import os
from google.cloud import aiplatform
from google.cloud import storage
from google.auth import default
from datetime import datetime
from kfp import dsl, components, compiler
from google.cloud import aiplatform
from google.cloud.aiplatform import pipeline_jobs

# Get default credentials and project
credentials, project_id = default()


# Configuration
REGION = "us-central1"
BUCKET_NAME = "qwiklabs-gcp-00-ffe6db11d36b-bucket"
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root"

In [3]:
def create_sample_data():
    """Create and upload sample datasets to GCS"""
    storage_client = storage.Client()
    bucket = storage_client.bucket(BUCKET_NAME)
    
    # Create sample CSV data
    sample_csv = """date,location,crop_type,field_size,rainfall,temperature,yield
2025-01-01,Iowa,corn,5.0,750,25,150
2025-01-15,Kansas,wheat,3.5,500,22,120
2025-02-01,Nebraska,soybean,4.2,600,24,130"""
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    # Upload tabular data
    tabular_blob = bucket.blob(f"data/{timestamp}/crop_data.csv")
    tabular_blob.upload_from_string(sample_csv)
    
    # Create dummy vision data
    vision_blob = bucket.blob(f"data/{timestamp}/vision_data.txt")
    vision_blob.upload_from_string("Dummy vision data for testing")
    
    print(f"Uploaded sample data to: gs://{BUCKET_NAME}/data/{timestamp}/")
    return (
        f"gs://{BUCKET_NAME}/data/{timestamp}/vision_data.txt",
        f"gs://{BUCKET_NAME}/data/{timestamp}/crop_data.csv"
    )

# Create sample datasets
vision_uri, tabular_uri = create_sample_data()
print(f"Vision Dataset URI: {vision_uri}")
print(f"Tabular Dataset URI: {tabular_uri}")

Forbidden: 403 POST https://storage.googleapis.com/upload/storage/v1/b/qwiklabs-gcp-00-ffe6db11d36b-bucket/o?uploadType=multipart: {
  "error": {
    "code": 403,
    "message": "agrifin-service-account@agrifingcpflow-465809.iam.gserviceaccount.com does not have storage.objects.create access to the Google Cloud Storage object. Permission 'storage.objects.create' denied on resource (or it may not exist).",
    "errors": [
      {
        "message": "agrifin-service-account@agrifingcpflow-465809.iam.gserviceaccount.com does not have storage.objects.create access to the Google Cloud Storage object. Permission 'storage.objects.create' denied on resource (or it may not exist).",
        "domain": "global",
        "reason": "forbidden"
      }
    ]
  }
}
: ('Request failed with status code', 403, 'Expected one of', <HTTPStatus.OK: 200>)

In [None]:

# Get the absolute path to the components directory
COMPONENTS_DIR = os.path.abspath(os.path.join(os.path.dirname(os.getcwd()), 'components'))

# Function to get absolute component path
def get_component_path(component_name):
    return os.path.join(os.path.dirname(os.getcwd()), 'components', component_name)
    
# Load components
preprocess_op = components.load_component_from_file(get_component_path('preprocess.yaml'))
train_vision_op = components.load_component_from_file(get_component_path('train_vision.yaml'))
train_tabular_op = components.load_component_from_file(get_component_path('train_tabular.yaml'))
deploy_op = components.load_component_from_file(get_component_path('deploy.yaml'))

# Define pipeline
@dsl.pipeline(
    name='AgriAutoML Pipeline',
    description='End-to-end pipeline for agricultural yield prediction'
)
def agri_automl_pipeline(
    project_id: str,
    region: str,
    bucket_name: str,
    vision_dataset_uri: str,
    tabular_dataset_uri: str,
    min_accuracy: float = 0.8
):
    # Preprocess data
    preprocess_task = preprocess_op(
        vision_data=vision_dataset_uri,
        tabular_data=tabular_dataset_uri,
        bucket_name=bucket_name
    )

    # Train vision model
    train_vision_task = train_vision_op(
        project_id=project_id,
        region=region,
        dataset=preprocess_task.outputs['vision_dataset'],
        min_accuracy=min_accuracy
    )

    # Train tabular model
    train_tabular_task = train_tabular_op(
        project_id=project_id,
        region=region,
        dataset=preprocess_task.outputs['tabular_dataset'],
        min_accuracy=min_accuracy
    )

    # Deploy models
    deploy_task = deploy_op(
        project_id=project_id,
        region=region,
        vision_model=train_vision_task.outputs['model_info'],
        tabular_model=train_tabular_task.outputs['model_info']
    )

KeyError: 'model'

In [None]:
# Initialize Vertex AI
aiplatform.init(
    project=project_id,
    location=REGION,
    credentials=credentials
)


# Compile pipeline
compiler.Compiler().compile(
    pipeline_func=agri_automl_pipeline,
    package_path='pipeline.yaml'
)


# Create and run pipeline job
job = pipeline_jobs.PipelineJob(
    display_name='agri-automl-pipeline',
    template_path='pipeline.yaml',
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        'project_id': project_id,  # Changed from PROJECT_ID
        'region': REGION,
        'bucket_name': BUCKET_NAME,
        'vision_dataset_uri': vision_uri,  # Changed from VISION_DATASET_URI
        'tabular_dataset_uri': tabular_uri,  # Changed from TABULAR_DATASET_URI
        'min_accuracy': 0.8
    }
)

job.submit()