# AgriAutoML Pipeline Execution in Vertex AI

This notebook demonstrates how to execute the AgriAutoML pipeline directly in Vertex AI Studio.

In [None]:
# Install required packages
!pip install google-cloud-aiplatform kfp google-cloud-storage pandas pillow numpy

In [None]:
import os
from google.cloud import aiplatform
from google.cloud import storage
from google.auth import default
from datetime import datetime

# Get default credentials and project
credentials, project_id = default()

# Configuration
REGION = "us-central1"
BUCKET_NAME = "agrifinstorage"
PIPELINE_ROOT = f"gs://{BUCKET_NAME}/pipeline_root"

# Initialize Vertex AI with default credentials
aiplatform.init(
    project=project_id,
    location=REGION,
    credentials=credentials
)

In [None]:
def create_sample_data():
    """Create and upload sample datasets to GCS"""
    storage_client = storage.Client()
    bucket = storage_client.bucket(BUCKET_NAME)
    
    # Create sample CSV data
    sample_csv = """date,location,crop_type,field_size,rainfall,temperature,yield
2025-01-01,Iowa,corn,5.0,750,25,150
2025-01-15,Kansas,wheat,3.5,500,22,120
2025-02-01,Nebraska,soybean,4.2,600,24,130"""
    
    # Upload tabular data
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    tabular_blob = bucket.blob(f"data/{timestamp}/crop_data.csv")
    tabular_blob.upload_from_string(sample_csv)
    
    print(f"Uploaded sample data to: gs://{BUCKET_NAME}/data/{timestamp}/")
    return f"gs://{BUCKET_NAME}/data/{timestamp}/crop_data.csv"

# Create sample dataset
dataset_uri = create_sample_data()
print(f"Dataset URI: {dataset_uri}")

In [None]:
from kfp import dsl
from kfp.v2 import compiler

@dsl.pipeline(
    name='AgriAutoML Pipeline',
    description='End-to-end pipeline for agricultural yield prediction'
)
def agri_automl_pipeline(
    project_id: str,
    region: str,
    dataset_uri: str,
    min_accuracy: float = 0.7
):
    from google.cloud import aiplatform
    
    # Create dataset
    dataset_create_op = aiplatform.TabularDataset.create(
        display_name="crop_yield_dataset",
        gcs_source=[dataset_uri]
    )
    
    # Train model
    training_job = aiplatform.AutoMLTabularTrainingJob(
        display_name="crop_yield_training",
        optimization_prediction_type="regression"
    )
    
    model = training_job.run(
        dataset=dataset_create_op,
        target_column="yield",
        budget_milli_node_hours=83.33,  # 5 minutes
        model_display_name="crop_yield_model",
    )
    
    # Deploy model
    endpoint = model.deploy(
        machine_type="n1-standard-4",
        min_replica_count=1,
        max_replica_count=1
    )

# Compile the pipeline
compiler.Compiler().compile(
    pipeline_func=agri_automl_pipeline,
    package_path='agri_automl_pipeline.json'
)

In [None]:
# Create and run pipeline job
job = aiplatform.PipelineJob(
    display_name="agri-automl-pipeline",
    template_path="agri_automl_pipeline.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={
        'project_id': project_id,
        'region': REGION,
        'dataset_uri': dataset_uri,
        'min_accuracy': 0.7
    }
)

job.run(sync=True)

In [None]:
def get_prediction(instance):
    """Get prediction from the deployed model"""
    endpoints = aiplatform.Endpoint.list()
    endpoint = endpoints[0]  # Get the most recently created endpoint
    
    prediction = endpoint.predict([instance])
    return prediction

# Test prediction
test_instance = {
    'date': '2025-03-01',
    'location': 'Iowa',
    'crop_type': 'corn',
    'field_size': 4.5,
    'rainfall': 700,
    'temperature': 23
}

prediction = get_prediction(test_instance)
print(f"Predicted yield: {prediction}")