# Create Azure ML Environment for experimenting, training and inferencing.

The following snippet shows you how to create a reproducible Azure ML environment, it will use a requirements.txt file which is also used when creating the Compute Instance, there we use a startup script that will create a virtual environment for us.

This keeps in sync the virtual environment with the Azure ML Environment, saving headaches later.

In [1]:
from azureml.core import Environment
from azureml.core import Workspace
import os

# Load the workspace from the saved config file
ws = Workspace.from_config(path='Users/levm38/config.json')

# Create the environment
env = Environment.from_pip_requirements(name="my-environment", file_path="requirements.txt")

# Register the environment
env.register(workspace=ws)
print("Environment registered successfully.")

Environment registered successfully.


# Trigger train.py

Then train.py is where you put your training script, and the snippet below just triggers it using Azure ML SDK.

In [None]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.core import Workspace

# Load the workspace
ws = Workspace.from_config(path='Users/levm38/config.json')

# Define the experiment
experiment = Experiment(workspace=ws, name='my-experiment')

# Load the registered environment
env = Environment.get(workspace=ws, name="my-environment")

# Define the run configuration
run_config = RunConfiguration()
run_config.environment = env

# Define script parameters
script_params = {
    '--data': 'data.csv'  # Path to the sample data file
}

# Configure the script run
src = ScriptRunConfig(source_directory='',
                      script='train.py',
                      arguments=[f'{k} {v}' for k, v in script_params.items()],
                      run_config=run_config)

# Submit the experiment
run = experiment.submit(src)
run.wait_for_completion(show_output=True)


# Train.py Contents

Before you code train.py, feel free to test it inside a Jupyter cell, then just use that code as train.py file.

In [3]:
import argparse
import joblib
import pandas as pd
import logging
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import sys
import os

# Configure logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger()

# Mock arguments for debugging
class Args:
    data = 'data.csv'

args = Args()

# Validate arguments
if not args.data:
    logger.error("No data path provided. Please specify the --data argument.")
    sys.exit(1)
    
if not os.path.isfile(args.data):
    logger.error(f"Data file not found: {args.data}")
    sys.exit(1)

# Load data
try:
    logger.info(f"Loading data from: {args.data}")
    data = pd.read_csv(args.data)
    logger.info("Data loaded successfully.")
except Exception as e:
    logger.error(f"Failed to load data: {e}")
    sys.exit(1)
    
try:
    X = data.drop('target', axis=1)
    y = data['target']
except KeyError as e:
    logger.error(f"KeyError during data processing: {e}")
    sys.exit(1)

# Train model
try:
    logger.info("Training model...")
    model = RandomForestClassifier()
    model.fit(X, y)
    logger.info("Model trained successfully.")
except Exception as e:
    logger.error(f"Failed to train model: {e}")
    sys.exit(1)

# Save the model
try:
    os.makedirs('outputs', exist_ok=True)
    joblib.dump(model, 'outputs/model.pkl')
    logger.info("Model saved successfully.")
except Exception as e:
    logger.error(f"Failed to save model: {e}")
    sys.exit(1)

# Print accuracy
try:
    y_pred = model.predict(X)
    accuracy = accuracy_score(y, y_pred)
    logger.info(f"Model accuracy: {accuracy}")
except Exception as e:
    logger.error(f"Failed to calculate accuracy: {e}")
    sys.exit(1)

2024-08-05 11:16:55,048 - INFO - Loading data from: data.csv
2024-08-05 11:16:55,058 - INFO - Data loaded successfully.
2024-08-05 11:16:55,060 - INFO - Training model...
2024-08-05 11:16:55,156 - INFO - Model trained successfully.
2024-08-05 11:16:55,235 - INFO - Model saved successfully.
2024-08-05 11:16:55,242 - INFO - Model accuracy: 1.0


# Register the model

The scripts above produce a model in picke format, be sure to register it.

In [None]:
from azureml.core import Workspace, Model

ws = Workspace.from_config(path='Users/levm38/config.json')

# Register the model
model = Model.register(workspace=ws,
                       model_path='outputs/model.pkl',
                       model_name='random_forest_model')

# Create deployment configuration

The snippet below will create the deployment configuration and the inference endpoint

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    Model,
    Environment,
    CodeConfiguration,
)

# Initialize MLClient
credential = DefaultAzureCredential()
ml_client = MLClient.from_config(credential)

# Define the managed online endpoint
endpoint_name = 'my-managed-online-endpoint'

endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    description="My managed online endpoint for model deployment",
    auth_mode="key"
)

# Create the endpoint
ml_client.begin_create_or_update(endpoint).result()

# Define the deployment
deployment_name = 'my-deployment'

# Use the registered model
model_id = 'random_forest_model:7'  # Ensure you use the correct model ID and version

# Retrieve the registered environment
environment = ml_client.environments.get(name="my-environment", version="8")  # Adjust the version as needed

deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint_name,
    model=model_id,  # Reference the registered model ID
    environment=environment.id,  # Use the registered environment ID
    code_configuration=CodeConfiguration(
        code='./',  # Assuming 'score.py' is in the current directory
        scoring_script='score.py'
    ),
    instance_type='Standard_F2s_v2',
    instance_count=1
)

# Create the deployment
ml_client.begin_create_or_update(deployment).result()

# Get the endpoint details
endpoint = ml_client.online_endpoints.get(name=endpoint_name)

print(f"Managed online endpoint state: {endpoint.provisioning_state}")
print(f"Managed online endpoint scoring URI: {endpoint.scoring_uri}")


# score.py

In [None]:
import json
import joblib
import pandas as pd
import os

# Define the init function to load the model
def init():
    global model
    model_path =  os.path.join(os.getcwd(), 'outputs/model.pkl') 
    print(f"Model path: {model_path}")
    
    # Check if the file exists
    if not os.path.isfile(model_path):
        raise FileNotFoundError(f"Model file not found: {model_path}")
    
    model = joblib.load(model_path)
    print(f"Model loaded successfully from: {model_path}")

# Define the run function to handle requests
def run(raw_data):
    try:
        data = json.loads(raw_data)
        df = pd.DataFrame(data)
        
        # Define required columns
        required_columns = ['feature1', 'feature2', 'feature3']
        
        # Check if required columns are present
        for col in required_columns:
            if col not in df.columns:
                raise ValueError(f"Missing required column: {col}")

        # Make predictions
        predictions = model.predict(df)
        
        return json.dumps(predictions.tolist())
    
    except Exception as e:
        error = str(e)
        return json.dumps({'error': error})

# Local test
init()

# Create some test data
test_data = {
    "feature1": [1, 5],
    "feature2": [2, 6],
    "feature3": [3, 7]
}
raw_data = json.dumps(test_data)

# Run the scoring function
result = run(raw_data)
print("Inference Result:", result)

# Testing the inference endpoint

In [15]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {
    "feature1": [1, 5],
    "feature2": [2, 6],
    "feature3": [3, 7]
}



body = str.encode(json.dumps(data))

url = 'https://my-managed-online-endpoint.swedencentral.inference.ml.azure.com/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = 'SHTW08RgiAc299VcZgqIbnaGD1cU3ePw'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'my-deployment' }

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))


b'"[0, 1]"'
