In [1]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

In [2]:
import os
## Either get environment variables, or a fallback name, which is the second parameter.
## Currently, fill in the fallback values. Later on, we will make sure to work with Environment values. So we're already preparing for it in here!
workspace_name = os.environ.get('WORKSPACE', 'sadiksha-sapkota-ml')
subscription_id = os.environ.get('SUBSCRIPTION_ID', 'c2427130-ac1b-4b29-a374-ae5927fe5c99')
resource_group = os.environ.get('RESOURCE_GROUP', 'mlops-project')

In [3]:
# The credential "DefaultAzureCredential" will use the same name as your logged in user.
credential = DefaultAzureCredential()

In [4]:
ml_client = MLClient(
    credential, subscription_id, resource_group, workspace_name
)

In [None]:
# Compute Instances need to have a unique name across the region.
from azure.ai.ml.entities import ComputeInstance, AmlCompute

ci_basic_name = "cpu-sadiksha-auto" # I add the suffix Auto, because we are automatically creating this instance.
ci_basic = ComputeInstance(name=ci_basic_name, size="STANDARD_DS3_v2")
ml_client.begin_create_or_update(ci_basic).result()

In [None]:
from azure.ai.ml.entities import Environment
import os

custom_env_name = "aml-Pillow"

pipeline_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for Image Processing (with Pillow)",
    tags={"Pillow": "10.0.1"},
    conda_file=os.path.join("components", "dataprep", "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

### Data cleaning component and pipeline

In [None]:
from azure.ai.ml import command, Input, Output, MLClient
from azure.identity import DefaultAzureCredential
from pathlib import Path

# Use absolute path to the code directory
code_path = Path("components/dataprep/code").resolve()

# Define and register the component
convert_resize_component = command(
    name="convert_resize_images",
    display_name="Convert PNG to JPG and Resize",
    description="Convert .png to .jpg and resize to 64x64.",
    inputs={"data": Input(type="uri_folder")},
    outputs={"output_data": Output(type="uri_folder", mode="rw_mount")},
    code=str(code_path),  # ✅ this is the correct usage
    command="python dataprep.py --data ${{inputs.data}} --output_data ${{outputs.output_data}}",
    environment="aml-Pillow@latest",
)

# Register the component
registered_component = ml_client.create_or_update(convert_resize_component.component)

print(f"✅ Registered: {registered_component.name} v{registered_component.version}")


In [None]:
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute="cpu-sadiksha-auto",
    description="MNIST preprocessing pipeline: PNG → JPG"
)
def mnist_preprocessing_pipeline(
    input_version: str,
    output_version: str,
):
    # Internal keys must use underscores
    digits = [
        ("mnist_2", "mnist-2", "1"),
        ("mnist_7", "mnist-7", "1"),
        ("mnist_8", "mnist-8", "1"),
    ]

    jobs = {}

    for key, dataset_name, version in digits:
        resize_job = convert_resize_component(
            data=Input(
                type="uri_folder",
                path=f"azureml:{dataset_name}:{version}"
            )
        )

        output_path = (
            "azureml://subscriptions/c2427130-ac1b-4b29-a374-ae5927fe5c99"
            "/resourcegroups/mlops-project"
            "/workspaces/sadiksha-sapkota-ml"
            f"/datastores/workspaceblobstore/paths/processed_mnist/{dataset_name}"
        )

        resize_job.outputs.output_data = Output(
            type="uri_folder",
            path=output_path,
            name=f"{key}_jpg",
            mode="rw_mount"
        )

        jobs[key] = resize_job

    return {k: v.outputs.output_data for k, v in jobs.items()}


In [None]:
# Let's instantiate the pipeline with the parameters of our choice
pipeline = mnist_preprocessing_pipeline()

In [None]:
import webbrowser

pipeline_job = ml_client.jobs.create_or_update(
    pipeline,
    experiment_name="image_preprocessing_pipeline"
)
webbrowser.open(pipeline_job.studio_url)

### train-test split

In [None]:
# FILE: component definition (e.g. components/dataprep/register_split_component.py)
from azure.ai.ml import command, Input, Output
import os

mnist_data_split_component = command(
    name="mnist_data_split",
    display_name="MNIST Data Splitting to Train and Test",
    description="Splits digit datasets into training and testing sets",
    inputs={
        "mnist_2_jpg": Input(type="uri_folder"),
        "mnist_7_jpg": Input(type="uri_folder"),
        "mnist_8_jpg": Input(type="uri_folder"),
        "train_test_split_factor": Input(type="number")
    },
    outputs={
        "training_data": Output(type="uri_folder", mode="rw_mount"),
        "testing_data": Output(type="uri_folder", mode="rw_mount"),
    },
    code=os.path.join("components", "dataprep", "code"),
    command="""python traintestsplit.py \
        --datasets ${{inputs.mnist_2_jpg}} ${{inputs.mnist_7_jpg}} ${{inputs.mnist_8_jpg}} \
        --split_size ${{inputs.train_test_split_factor}} \
        --training_data_output ${{outputs.training_data}} \
        --testing_data_output ${{outputs.testing_data}}""",
    environment="aml-Pillow@latest",
)


In [None]:
# FILE: pipeline definition (e.g. pipelines/mnist_split_pipeline.py)
from azure.ai.ml import dsl, Input, Output

@dsl.pipeline(
    compute="cpu-sadiksha-auto",
    description="MNIST split pipeline using JPG inputs"
)
def mnist_images_traintest_split_pipeline(
    train_test_split: int,
    mnist_2_jpg: Input,
    mnist_7_jpg: Input,
    mnist_8_jpg: Input,
):
    split_job = mnist_data_split_component(
        mnist_2_jpg=mnist_2_jpg,
        mnist_7_jpg=mnist_7_jpg,
        mnist_8_jpg=mnist_8_jpg,
        train_test_split_factor=train_test_split,
    )

    split_job.outputs.training_data = Output(
        type="uri_folder",
        name="training_data",
        mode="rw_mount"
    )
    split_job.outputs.testing_data = Output(
        type="uri_folder",
        name="testing_data",
        mode="rw_mount"
    )

    return {
        "training_data": split_job.outputs.training_data,
        "testing_data": split_job.outputs.testing_data
    }


In [None]:
version = "1"
digits = ["mnist_2_jpg", "mnist_7_jpg", "mnist_8_jpg"]

# Map them directly with expected keys
digits_datasets = {
    digit: Input(type="uri_folder", path=f"azureml:{digit}:{version}")
    for digit in digits
}

print(digits_datasets)

# Create pipeline job
train_test_pipeline = mnist_images_traintest_split_pipeline(
    **digits_datasets,
    train_test_split=20
)

In [None]:
# Submit to Azure ML
pipeline_job = ml_client.jobs.create_or_update(
    train_test_pipeline,
    experiment_name="mnist_split_pipeline"
)

# Open in browser
import webbrowser
webbrowser.open(pipeline_job.studio_url)

### Training the dataset 

### Creating environment for training 

In [5]:
from azure.ai.ml.entities import Environment
import os

custom_env_name = "aml-Tensorflow-Pillow"

pipeline_job_env = Environment(
    name=custom_env_name,
    description="Custom environment for AI Training (with Pillow)",
    tags={"Pillow": "10.0.1", "Tensorflow": "2.4.1"},
    conda_file=os.path.join("components", "training", "conda.yaml"),
    image="mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest",
)
pipeline_job_env = ml_client.environments.create_or_update(pipeline_job_env)

print(
    f"Environment with name {pipeline_job_env.name} is registered to workspace, the environment version is {pipeline_job_env.version}"
)

Environment with name aml-Tensorflow-Pillow is registered to workspace, the environment version is 1


In [6]:
from azure.ai.ml import command, Input, Output
import os

mnist_training_component = command(
    name="mnist_training",
    display_name="MNIST CNN Training",
    description="Trains a CNN on 28x28 grayscale MNIST digit JPGs using 3-digit classification.",
    inputs={
        "training_folder": Input(type="uri_folder"),
        "testing_folder": Input(type="uri_folder"),
        "epochs": Input(type="number")
    },
    outputs={
        "output_folder": Output(type="uri_folder", mode="rw_mount"),
    },
    code=os.path.join("components", "training", "code"),  # This must contain train.py & utils.py
    command="""
        python train.py \
        --training_folder ${{inputs.training_folder}} \
        --testing_folder ${{inputs.testing_folder}} \
        --output_folder ${{outputs.output_folder}} \
        --epochs ${{inputs.epochs}}
    """,
    environment="aml-Tensorflow-Pillow@latest"
)


In [None]:
registered_training_component = ml_client.components.create_or_update(mnist_training_component)
print(
    f"Component {registered_training_component.name} registered (version: {registered_training_component.version})"
)


In [7]:
from azure.ai.ml import dsl

@dsl.pipeline(
    compute="cpu-sadiksha-auto",  # Replace with your compute name
    description="MNIST Digit Classification Training Pipeline",
)
def mnist_training_pipeline(training_folder: Input, testing_folder: Input, epochs: int):
    training_job = mnist_training_component(
        training_folder=training_folder,
        testing_folder=testing_folder,
        epochs=epochs
    )

    training_job.outputs.output_folder = Output(
        type="uri_folder",
        name="mnist_trained_model_output",
        mode="rw_mount"
    )

    return {"output_data": training_job.outputs.output_folder}


In [8]:
import webbrowser
from azure.ai.ml import Input

mnist_pipeline_instance = mnist_training_pipeline(
    training_folder=Input(type="uri_folder", path="azureml:training_data:1"),
    testing_folder=Input(type="uri_folder", path="azureml:testing_data:1"),
    epochs=20,
)

mnist_pipeline_job = ml_client.jobs.create_or_update(
    mnist_pipeline_instance,
    experiment_name="mnist_training_pipeline",
)

print(f"✅ Pipeline submitted: {mnist_pipeline_job.name}")
webbrowser.open(mnist_pipeline_job.studio_url)


Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
[32mUploading code (0.01 MBs): 100%|█

✅ Pipeline submitted: silver_ant_y8b8l74jz0


False