# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.pipeline.core import Pipeline

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.51.0


In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'automl-experiment'

experiment=Experiment(ws, experiment_name)

In [3]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
amlcompute_cluster_name = "my-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)
# For a more detailed view of current AmlCompute status, use get_status().

InProgress..
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Dataset

### Overview
TODO: In this markdown cell, give an overview of the dataset you are using. Also mention the task you will be performing.


TODO: Get data. In the cell below, write code to access the data you will be using in this project. Remember that the dataset needs to be external.

In [4]:
project_name = 'azureml-spaceship-titanic'
project_folder = os.getcwd().split(project_name)[0] + project_name

In [5]:
import sys
sys.path.append(project_folder)

from src.pipelines.preprocess import preprocess_data

df = pd.read_csv('../data/01_raw/train.csv')
processed_df = preprocess_data(df).drop(columns=["PassengerId"])


In [6]:
dataset_name = "Spaceship_Dataset"
description_text = "Data to predict which passengers are transported to an alternate dimension"

# Create AML Dataset and register it into Workspace
dataset = Dataset.Tabular.register_pandas_dataframe(processed_df, ws.get_default_datastore(), dataset_name)   
#Register Dataset in Workspace
dataset = dataset.register(workspace=ws,
                                name=dataset_name,
                                description=description_text,
                                create_new_version=True
                        )


df = dataset.to_pandas_dataframe()
df.describe()

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/56b45e6b-0788-48fe-ad31-696c88cab3b3/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


Unnamed: 0,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,People_in_Cabin_Num,People_in_Cabin_Deck,Family_Size,Group_Size
count,8512.0,8510.0,8485.0,8510.0,8505.0,8494.0,8494.0,8493.0,8693.0
mean,224.687617,458.077203,173.729169,311.138778,304.854791,8.037203,1952.105957,5.428117,2.035546
std,666.717663,1611.48924,604.696458,1136.705535,1145.717189,5.214669,963.741893,2.891901,1.596347
min,0.0,0.0,0.0,0.0,0.0,1.0,5.0,1.0,1.0
25%,0.0,0.0,0.0,0.0,0.0,4.0,779.0,3.0,1.0
50%,0.0,0.0,0.0,0.0,0.0,7.0,2559.0,5.0,1.0
75%,47.0,76.0,27.0,59.0,46.0,12.0,2794.0,7.0,3.0
max,14327.0,29813.0,23492.0,22408.0,24133.0,28.0,2794.0,18.0,8.0


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [9]:
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'accuracy',
    "n_cross_validations": 5,
}
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=dataset,
                             label_column_name="Transported",   
                             path = project_folder + '/automl-pipeline',
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )
automl_run = experiment.submit(automl_config)

## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [12]:
from azureml.widgets import RunDetails

RunDetails(automl_run).show()
automl_run.wait_for_completion(show_output=True)

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

Experiment,Id,Type,Status,Details Page,Docs Page
automl-experiment,AutoML_4c7ea1c1-7479-4a9f-b8fc-77866872cc89,automl,Completed,Link to Azure Machine Learning studio,Link to Documentation




********************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are balanced in your training data.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData

********************************************************************************************

TYPE:         Missing feature values imputation
STATUS:       DONE
DESCRIPTION:  If the missing values are expected, let the run complete. Otherwise cancel the current run and use a script to customize the handling of missing feature values that may be more appropriate based on the data type and business requirement.
              Learn more about missing value imputation: https://aka.ms/AutomatedMLFeaturization
DETAILS:      
+------------------------------+------------------------------+------------------------------+
|Column name          

{'runId': 'AutoML_4c7ea1c1-7479-4a9f-b8fc-77866872cc89',
 'target': 'my-cluster',
 'status': 'Completed',
 'startTimeUtc': '2024-02-01T14:22:28.435064Z',
 'endTimeUtc': '2024-02-01T15:03:29.34377Z',
 'services': {},
   'message': 'Experiment timeout reached, hence experiment stopped. Current experiment timeout: 0 hour(s) 20 minute(s)'}],
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'my-cluster',
  'DataPrepJsonString': '{\\"training_data\\": {\\"datasetId\\": \\"6a9798a1-0076-4fa0-b4e5-1c794db716c7\\"}, \\"datasets\\": 0}',
  'EnableSubsampling': None,
  'runTemplate': 'AutoML',
  'azureml.runsource': 'automl',
  'display_task_type': 'classification',
  'dependencies_versions': '{"azureml-dataprep-native": "38.0.0", "azureml-dataprep": "4.10.8", "azureml-dataprep-rslex": "2.17.12", "azureml-train-au

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [19]:
best_run, best_model = automl_run.get_output()
best_metrics = best_run.get_metrics()
print(best_run)
print(best_metrics)

Package:azureml-automl-runtime, training version:1.52.0.post1, current version:1.51.0.post1
Package:azureml-core, training version:1.52.0, current version:1.51.0
Package:azureml-dataprep, training version:4.11.4, current version:4.10.8
Package:azureml-dataprep-rslex, training version:2.18.4, current version:2.17.12
Package:azureml-dataset-runtime, training version:1.52.0, current version:1.51.0
Package:azureml-defaults, training version:1.52.0, current version:1.51.0
Package:azureml-interpret, training version:1.52.0, current version:1.51.0
Package:azureml-mlflow, training version:1.52.0, current version:1.51.0
Package:azureml-pipeline-core, training version:1.52.0, current version:1.51.0
Package:azureml-responsibleai, training version:1.52.0, current version:1.51.0
Package:azureml-telemetry, training version:1.52.0, current version:1.51.0
Package:azureml-train-automl-client, training version:1.52.0, current version:1.51.0.post1
Package:azureml-train-automl-runtime, training version:1.

Run(Experiment: automl-experiment,
Id: AutoML_8bab294e-51b5-4cc6-88b4-d6c7c64d8400_36,
Type: azureml.scriptrun,
Status: Completed)
{'log_loss': 0.4652249570658661, 'f1_score_weighted': 0.8115565663088772, 'recall_score_micro': 0.8115716014719517, 'precision_score_weighted': 0.8116651090946579, 'average_precision_score_macro': 0.9010612370473703, 'f1_score_micro': 0.8115716014719515, 'precision_score_macro': 0.8116430852921802, 'average_precision_score_micro': 0.9047032724388469, 'precision_score_micro': 0.8115716014719517, 'matthews_correlation': 0.6231820430023602, 'accuracy': 0.8115716014719517, 'f1_score_macro': 0.8115292290992873, 'weighted_accuracy': 0.8116041725138953, 'AUC_macro': 0.8999161008789696, 'average_precision_score_weighted': 0.9011573374563951, 'AUC_weighted': 0.8999161008789696, 'balanced_accuracy': 0.8115389754598159, 'norm_macro_recall': 0.6230779509196316, 'recall_score_macro': 0.8115389754598159, 'AUC_micro': 0.9020747095634671, 'recall_score_weighted': 0.8115716

In [24]:
best_model

PipelineWithYTransformations(Pipeline={'memory': None,
                                       'steps': [('datatransformer',
                                                  DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mn...
                                                  PreFittedSoftVotingClassifier(classification_labels=array([0, 1]), estimators=[('18', Pipeline(memory=None, steps=[('standardscalerwrapper', StandardScalerWrapper(copy=True, with_mean=False, with_std=False)), ('xgboostclassifier', XGBoostClassifier(booster='gbtree', colsample_bytree=0.7, eta=0.1, gamma=0.1, max_depth=9, max_leaves=511, n_estimators=25, n_jobs=1, objective='reg:logistic', problem_info=ProblemInfo(gpu_training_param_dict={'processing_unit_type': 'cpu'}), random_state=0, reg

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [20]:
registered_model = automl_run.register_model(model_name='BestAutoMLmodel')

In [22]:
script = './score.py'
best_run.download_file('./outputs/scoring_file_v_1_0_0.py', script)

In [23]:
from azureml.core.model import InferenceConfig
from  azureml.core.environment import Environment
from azureml.core import Model
from azureml.core.webservice import AciWebservice

env = best_run.get_environment()    

inference_config = InferenceConfig(entry_script=script,
                                   environment=env)

deployment_config =AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1,
                                               enable_app_insights=True,
                                               auth_enabled=True,
                                              )

service = Model.deploy(ws, "web-service-automl", [registered_model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)

scoring_uri = service.scoring_uri
print(scoring_uri)

key, sec_key = service.get_keys()
print(key)


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2024-02-01 02:19:03+00:00 Creating Container Registry if not exists..
2024-02-01 02:29:03+00:00 Registering the environment.
2024-02-01 02:29:03+00:00 Use the existing image.
2024-02-01 02:29:04+00:00 Submitting deployment to compute..
2024-02-01 02:29:09+00:00 Checking the status of deployment web-service-automl..
2024-02-01 02:33:06+00:00 Checking the status of inference endpoint web-service-automl.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy
http://d1ddb3c1-cf85-4869-8ac2-954df8eb7cdc.westus2.azurecontainer.io/score
VInpAwDABTtjDPRuYfl8nCoXr916JbvZ


Deployed from Studio



TODO: In the cell below, send a request to the web service you deployed to test it.

In [26]:
import requests
import json

# URL for the web service, should be similar to:
# 'http://8530a665-66f3-49c8-a953-b82a2d312917.eastus.azurecontainer.io/score'
# scoring_uri = 'http://091674f3-7d52-4713-a563-29fdf9673dcd.westeurope.azurecontainer.io/score'
# If the service is authenticated, set the key or token
# key = '2cloAPK95e4LlPLSINN0HXiMNWGOcJb5'

# Two sets of data to score, so we get two results back
data = {
    "data":
            [
              {
        "HomePlanet": "Europa",
        "CryoSleep": "False",
        "Destination": "TRAPPIST-1e",
        "VIP": "False",
        "RoomService": 109.00,
        "FoodCourt": 1000,
        "ShoppingMall": 25.0,
        "Spa": 200.0,
        "VRDeck": 2.0,
        "Cabin_Deck": "B",
        "Cabin_Side": "P",
        "Cabin_Region": "A",
        "People_in_Cabin_Num": 14,
        "People_in_Cabin_Deck": 700,
        "Family_Size": 4,
        "Group_Size": 2,
        "Age_Cat": "Pre_Adult"
        }
      ],
    "method": "predict"
}
# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())


{"result": [false]}


TODO: In the cell below, print the logs of the web service and delete the service

In [27]:
%%writefile logs.py
from azureml.core import Workspace, Webservice
from azureml.exceptions import WebserviceException
import time

# Load the Azure ML workspace
ws = Workspace.from_config()

# Name of the web service
service_name = 'web-service-automl'

# Function to attempt to delete the service
def try_delete_service(service):
    try:
        service.delete()
        print("Web service deleted successfully.")
        return True
    except WebserviceException as e:
        print(f"Error deleting web service: {str(e)}")
        return False

# Get a reference to the web service
service = Webservice(workspace=ws, name=service_name)

# Print logs
print("Web service logs:")
print(service.get_logs())

# Wait for deployment operation to complete
try:
    service.wait_for_deployment()
except WebserviceException as e:
    # Ignore the exception related to the operation status
    if "No operation endpoint" not in str(e) and "Long running operation information not known" not in str(e):
        raise

# Attempt to delete the deployed web service with retries
max_retries = 3
retry_count = 0

while retry_count < max_retries:
    if try_delete_service(service):
        break
    else:
        retry_count += 1
        print(f"Retrying deletion (attempt {retry_count}/{max_retries})...")
        time.sleep(10)  # Wait for 10 seconds before retrying

if retry_count == max_retries:
    print("Max retries reached. Unable to delete the web service.")


Overwriting logs.py


In [28]:
!python logs.py

Web service logs:
None
No operation endpoint

Long running operation information not known, unable to poll. Current state is Failed

Web service deleted successfully.


**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
