In [None]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Environment
from azure.identity import DefaultAzureCredential

# Initialize ML Client
credential = DefaultAzureCredential()
ml_client = MLClient(
    credential,
    subscription_id="<your-subscription-id>",
    resource_group="<your-resource-group>",
    workspace="<your-workspace>"
)

# Create environment
env = Environment(
    name="personal_agendas_env",
    description="Environment for Personal Agendas pipeline",
    conda_file="./env/conda.yaml",
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest"
)
ml_client.environments.create_or_update(env)

In [None]:
from azure.ai.ml import MLClient, command, Input, Output
from azure.ai.ml.dsl import pipeline
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes, InputOutputModes
from azure.identity import DefaultAzureCredential
import os

# Initialize ML Client
credential = DefaultAzureCredential()
ml_client = MLClient(
    credential,
    subscription_id=os.getenv("SUBSCRIPTION_ID"),
    resource_group=os.getenv("RESOURCE_GROUP"),
    workspace=os.getenv("AZUREML_WORKSPACE_NAME")
)

# Define Step 1: Data Preparation
@command(
    name="data_preparation",
    display_name="Data Preparation (Registration, Scan, Session)",
    environment="personal_agendas_env:latest",
    compute="cpu-cluster",
    code="./azureml_pipeline",
    is_deterministic=False
)
def data_preparation_step(
    input_uri: Input(type=AssetTypes.URI_FOLDER),
    config_file: Input(type=AssetTypes.URI_FILE),
    incremental: bool = False
) -> dict:
    return {
        "registration_output": Output(type=AssetTypes.URI_FOLDER),
        "scan_output": Output(type=AssetTypes.URI_FOLDER),
        "session_output": Output(type=AssetTypes.URI_FOLDER),
        "metadata_output": Output(type=AssetTypes.URI_FOLDER)
    }

# Define the pipeline
@pipeline(
    name="personal_agendas_pipeline",
    description="Personal Agendas data processing pipeline"
)
def personal_agendas_pipeline(
    input_data_uri: str,
    config_type: str = "vet"  # or "ecomm"
):
    # Step 1: Data Preparation
    step1 = data_preparation_step(
        input_uri=input_data_uri,
        config_file=f"./PA/config/config_{config_type}.yaml",
        incremental=False
    )
    
    # Additional steps would be added here
    # step2 = neo4j_preparation_step(...)
    # step3 = embeddings_step(...)
    # step4 = recommendations_step(...)
    
    return {
        "registration_data": step1.outputs.registration_output,
        "scan_data": step1.outputs.scan_output,
        "session_data": step1.outputs.session_output
    }

# Submit the pipeline
pipeline_job = personal_agendas_pipeline(
    input_data_uri="azureml://datastores/landing/paths/weekly_refresh_data",
    config_type="vet"
)

# Submit to Azure ML
submitted_job = ml_client.jobs.create_or_update(
    pipeline_job,
    experiment_name="personal_agendas_experiment"
)

print(f"Pipeline submitted: {submitted_job.name}")
print(f"Monitor at: {submitted_job.studio_url}")