Linen usage prediction for the next 30 days / 7 days
> Using Azure ML SDK v2 (MLClient).

In [None]:

# Install SDK v2 (run once in the notebook if needed)
!pip install --quiet azure-ai-ml azure-identity

In [None]:

!pip show azure-ai-ml

In [None]:
# Connect using MLClient (SDK v2)
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient, Input
from azure.ai.ml.entities import AmlCompute

try:
    credential = DefaultAzureCredential()
    credential.get_token("https://management.azure.com/.default")
except Exception:
    credential = InteractiveBrowserCredential()

# MLClient will read configuration from ./config.json or env vars if present
ml_client = MLClient.from_config(credential=credential)
print('MLClient initialized for subscription/workspace')

In [None]:

import pandas as pd
df = pd.read_csv('./src/usage.csv')
print(df.columns.tolist())
print(df.head())

In [None]:

target_column_name = 'LinenUsage'
time_column_name = 'Date'
forecast_horizon = 30
df[time_column_name] = pd.to_datetime(df[time_column_name])
df = df.sort_values(time_column_name).reset_index(drop=True)
print(df.dtypes)
print('Rows:', len(df))

In [None]:
# Create or get a compute target via MLClient (AmlCompute entity)
compute_name = 'cpu-cluster' # to be repalced by the cluster name in Azure
existing = {c.name: c for c in ml_client.compute.list()}
if compute_name in existing:
    print('Found compute:', compute_name)
else:
    print('Creating compute:', compute_name)
    compute = AmlCompute(name=compute_name, size='STANDARD_D2_V2', min_instances=0, max_instances=4)
    ml_client.compute.begin_create_or_update(compute).result()
    print('Compute created')

In [None]:
# Install MLflow (run once if needed)
!pip install --quiet mlflow

In [None]:
# Configure MLflow local tracking (change URI to server if you have one)
import mlflow
mlflow.set_tracking_uri('file:./mlruns')
mlflow.set_experiment('linen-forecast-experiment')
print('MLflow tracking URI:', mlflow.get_tracking_uri())

In [None]:
# Define AutoML forecasting job and submit it while logging to MLflow
from azure.ai.ml.automl import forecasting
import json, os

# Use Input to point to the local CSV - MLClient will handle upload as needed
training_data = Input(type='uri_file', path='./src/usage.csv')

# Create a forecasting job: disable ensembling and block non-interpretable algos
job = forecasting(
    training_data=training_data,
    target_column_name=target_column_name,
    time_column_name=time_column_name,
    primary_metric='normalized_root_mean_squared_error',
    compute=compute_name,
    experiment_name='linen-forecast-automl-v2',
    forecast_horizon=forecast_horizon,
    featurization='auto',
    enable_ensembling=False,
    enable_stack_ensemble=False,
    blocked_training_algorithms=['LightGBM', 'DeepAR', 'ExtremeRandomTrees'],
    limits={
        'max_concurrent_trials': 4,
        'timeout_minutes': 60
    }
)

# Submit the job within an MLflow run so we capture params/metadata/artifacts
with mlflow.start_run() as run:
    mlflow.log_param('forecast_horizon', forecast_horizon)
    mlflow.log_param('target_column', target_column_name)
    mlflow.log_param('time_column', time_column_name)
    mlflow.log_param('compute', compute_name)
    mlflow.log_param('blocked_algos', ','.join(job.blocked_training_algorithms or []))

    returned_job = ml_client.jobs.create_or_update(job)
    mlflow.log_param('azureml_job_name', returned_job.name)
    print('Submitted job:', returned_job.name)

    # Stream job status to notebook
    ml_client.jobs.stream(returned_job.name)

    # After completion gather details and artifacts
    job_details = ml_client.jobs.get(returned_job.name)
    mlflow.log_param('job_status', job_details.status)

    # Save job JSON for traceability and log as artifact
    job_json_path = 'job_details_{}.json'.format(returned_job.name)
    with open(job_json_path, 'w') as f:
        json.dump(job_details._to_rest_object().serialize(), f)  # serialize REST payload
    mlflow.log_artifact(job_json_path)

    # Download job artifacts (if any) and log them
    artifacts_dir = './job_artifacts_{}'.format(returned_job.name)
    os.makedirs(artifacts_dir, exist_ok=True)
    try:
        ml_client.jobs.download(returned_job.name, download_path=artifacts_dir)
        mlflow.log_artifacts(artifacts_dir)
    except Exception as ex:
        print('No artifacts to download or download failed:', ex)

    # If you'd like, inspect job_details to find the best child trial and log its metrics
    # This is a generic example: inspect job_details.properties or job_details.child_jobs for trial info
    try:
        # attempt to log top-level metrics if present
        if hasattr(job_details, 'properties') and job_details.properties:
            # store properties as artifact for inspection
            props_path = 'job_properties_{}.json'.format(returned_job.name)
            with open(props_path, 'w') as pf:
                json.dump(job_details.properties, pf)
            mlflow.log_artifact(props_path)
    except Exception:
        pass

    print('MLflow run id:', run.info.run_id)

In [None]:
# Retrieve job details and best child run information
job_details = ml_client.jobs.get(returned_job.name)
print('Job status:', job_details.status)

# The SDK v2 job object contains child jobs/trials in properties (inspect job_details to find best trial)
print(job_details.__dict__.keys())

# You can fetch returned_job or child runs from ml_client.jobs.list() and inspect metrics to find best trial

In [None]:
# Retrieve best child run and model details
best_run = None
best_run