In [1]:
from azure.ai.ml.entities import AmlCompute
from azure.ai.ml.entities import ComputeInstance
from azure.ai.ml.entities import Environment
from azure.ai.ml import command
from azure.ai.ml.entities import PipelineJob

In [2]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()

# Access the variables
subscription_id = os.getenv("SUBSCRIPTION_ID")
resource_group = os.getenv("RESOURCE_GROUP")
workspace_name = os.getenv("WORKSPACE_NAME")

# Connect to ML workspace using InteractiveBrowserCredential
credential = DefaultAzureCredential()

# Create an MLClient object
ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)

In [4]:
from azure.ai.ml.entities import AmlCompute

cpu_cluster_name = "cpu-cluster"

try:
    # Try to get the existing compute target
    compute_target = ml_client.compute.get(cpu_cluster_name)
    print(f"Found existing cluster: {cpu_cluster_name}")
except Exception as e:
    print(f"Cluster not found, creating a new one: {str(e)}")
    # Define the configuration for the new cluster
    compute_config = AmlCompute(
        name=cpu_cluster_name,
        size="STANDARD_DS11_V2",  # VM size
        min_instances=0,
        max_instances=1
    )
    # Create the new cluster
    compute_target = ml_client.compute.begin_create_or_update(compute_config).result()

Cluster not found, creating a new one: Operation returned an invalid status 'Not Found'


In [6]:
from azure.ai.ml.entities import Environment

# Define environment with dependencies
myenv = Environment(
    name="myenv",
    conda_file="environment.yml",  # You can provide a conda YAML file if needed
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",  # Base Docker image
)

# Optionally save the environment to the workspace
# ml_client.environments.create_or_update(myenv)

In [7]:
from azure.ai.ml.entities import Data
from azure.ai.ml.entities import Model

# Load dataset from AML
dataset_name = "german_credit_card_hsg"
dataset_version = "1"  # Specify version if needed
dataset = ml_client.data.get(name=dataset_name, version=dataset_version)

# Load model from AML
model_name = "german-credit-card-hsg"
model_version = "1"  # Specify version if needed
model = ml_client.models.get(name=model_name, version=model_version)


In [8]:
%%writefile scripts/batch_score.py

import argparse
import pandas as pd
import joblib
import os

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input_data", type=str, help="Path to input dataset")
    parser.add_argument("--model_path", type=str, help="Path to input model")
    parser.add_argument("--output_data", type=str, help="Path to save output dataset")
    
    args = parser.parse_args()

    # Load dataset
    df = pd.read_csv(args.input_data)  # Assuming input_data is a CSV file path
    df.drop('Sno', axis=1, inplace=True)  # Modify this according to your dataset

    # Load model
    model = joblib.load(args.model_path)  # Load the model

    # Score new data
    new_data = df[9:16]  # Take a sample of data
    results = model.predict(new_data)
    new_data['prediction'] = results

    # Save predictions as a new dataset
    os.makedirs(args.output_data, exist_ok=True)
    output_file = os.path.join(args.output_data, "predictions.csv")
    new_data.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

if __name__ == "__main__":
    main()


Overwriting scripts/batch_score.py


In [9]:
from azure.ai.ml import Input, Output
from azure.ai.ml.entities import CommandComponent
from azure.ai.ml.dsl import pipeline

# Define the component that runs the batch_score.py script
batch_scoring_component = CommandComponent(
    name="batch_scoring_component",
    display_name="Batch Scoring Component",
    description="Component that runs the batch scoring script",
    environment=myenv,
    code="./scripts",  # Path to the folder containing batch_score.py
    command="python batch_score.py --input_data ${{inputs.input_data}} --model_path ${{inputs.model_path}} --output_data ${{outputs.output_data}}",
    inputs={
        "input_data": Input(type="uri_file"),
        "model_path": Input(type="uri_file")
    },
    outputs={
        "output_data": Output(type="uri_folder")  # Define an output to store the results
    },
)

# Create a pipeline to execute the component
@pipeline(default_compute=cpu_cluster_name)
def batch_pipeline(input_data, model_path):
    batch_job = batch_scoring_component(input_data=input_data, model_path=model_path)

# Provide the required inputs to the pipeline
pipeline_job = batch_pipeline(
    input_data=Input(type="uri_file", path=dataset.path),  # Pass the dataset
    model_path=Input(type="uri_file", path=model.path)  # Pass the model
)

# Submit the pipeline job
ml_client.jobs.create_or_update(pipeline_job)

Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading scripts (0.0 MBs): 100%

Experiment,Name,Type,Status,Details Page
MLOps,boring_kettle_yyx50506nj,pipeline,NotStarted,Link to Azure Machine Learning studio


In [None]:
# Publish the pipeline
#pipeline_job.name = "pred-ops-concept-test"
#ml_client.jobs.create_or_update(pipeline_job)
