# Automated ML

TODO: Import Dependencies. In the cell below, import all the dependencies that you will need to complete the project.

In [31]:
# !pip install opendatasets
import opendatasets

import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.datastore import Datastore

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.42.0


## Dataset

### Overview
The dataset that will be used for this project is the Credit Card Churn Prediction dataset from Kaggle. According to Kaggle, the goal of the dataset is to identify the cause of customer attrition from a consumer credit card bank. The dataset and additional information can be found here: https://www.kaggle.com/datasets/anwarsan/credit-card-bank-churn.

To create the Dataset for this project, the data was first downloaded from Kaggle using the opendatasets package and entering my username and API KEy when prompted. The dataset contains two Naive Bayes Classifier fields which the uploader suggested to be deleted and not be considered for analysis; as such, I read the csv to a pandas Dataframe and dropped the two columns before proceeding. Once the dataset was cleaned, I proceeded to register the Dataframe as a TabularDataset in the Workspace's Datastore for use in this experiment.

In [61]:
# Download the dataset from Kaggle. This will prompt user input for a username and API Key.
# opendatasets.download('https://www.kaggle.com/datasets/anwarsan/credit-card-bank-churn')

# Clean the dataset. Need to drop last two columns that are from someone else's analysis and should not be included as predictors.
df = pd.read_csv('credit-card-bank-churn/credit_card_churn.csv')
df = df.drop(['CLIENTNUM', 'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_1', 'Naive_Bayes_Classifier_Attrition_Flag_Card_Category_Contacts_Count_12_mon_Dependent_count_Education_Level_Months_Inactive_12_mon_2'], axis=1)

In [62]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'capstone-experiment'
experiment=Experiment(ws, experiment_name)

found = False
key = "credit_card_churn"
description_text = "Credit Card Churn Prediction dataset for Machine Learning Engineer with Microsoft Azure"

if key in ws.datasets.keys():
    print(f'Dataset with key: {key} found in Workspace.')
    found = True
    dataset = ws.datasets[key]

if not found:
    print(f'Dataset with key: {key} not found in Workspace.')
    # Get the Datastore where the Dataset will be registered
    datastore = Datastore.get(ws, 'workspaceblobstore')
    dataset = Dataset.Tabular.register_pandas_dataframe(dataframe=df, target=datastore, name=key, description=description_text)

df = dataset.to_pandas_dataframe()
df.describe()

Dataset with key: credit_card_churn not found in Workspace.
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/a78a4549-7e05-47d3-9f86-17498c328ea8/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


Unnamed: 0,Customer_Age,Dependent_count,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio
count,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0,10127.0
mean,46.33,2.35,35.93,3.81,2.34,2.46,8631.95,1162.81,7469.14,0.76,4404.09,64.86,0.71,0.27
std,8.02,1.3,7.99,1.55,1.01,1.11,9088.78,814.99,9090.69,0.22,3397.13,23.47,0.24,0.28
min,26.0,0.0,13.0,1.0,0.0,0.0,1438.3,0.0,3.0,0.0,510.0,10.0,0.0,0.0
25%,41.0,1.0,31.0,3.0,2.0,2.0,2555.0,359.0,1324.5,0.63,2155.5,45.0,0.58,0.02
50%,46.0,2.0,36.0,4.0,2.0,2.0,4549.0,1276.0,3474.0,0.74,3899.0,67.0,0.7,0.18
75%,52.0,3.0,40.0,5.0,3.0,3.0,11067.5,1784.0,9859.0,0.86,4741.0,81.0,0.82,0.5
max,73.0,5.0,56.0,6.0,6.0,6.0,34516.0,2517.0,34516.0,3.4,18484.0,139.0,3.71,1.0


In [63]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,Attrition_Flag,Customer_Age,Gender,Dependent_count,Education_Level,Marital_Status,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,Contacts_Count_12_mon,Credit_Limit,Total_Revolving_Bal,Avg_Open_To_Buy,Total_Amt_Chng_Q4_Q1,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio
0,Existing Customer,45,M,3,High School,Married,$60K - $80K,Blue,39,5,1,3,12691.0,777,11914.0,1.33,1144,42,1.62,0.06
1,Existing Customer,49,F,5,Graduate,Single,Less than $40K,Blue,44,6,1,2,8256.0,864,7392.0,1.54,1291,33,3.71,0.1
2,Existing Customer,51,M,3,Graduate,Married,$80K - $120K,Blue,36,4,1,0,3418.0,0,3418.0,2.59,1887,20,2.33,0.0
3,Existing Customer,40,F,4,High School,Unknown,Less than $40K,Blue,34,3,4,1,3313.0,2517,796.0,1.41,1171,20,2.33,0.76
4,Existing Customer,40,M,3,Uneducated,Married,$60K - $80K,Blue,21,5,1,0,4716.0,0,4716.0,2.17,816,28,2.5,0.0


## AutoML Configuration

TODO: Explain why you chose the automl settings and cofiguration you used below.

In [64]:
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# The name of the CPU cluster to use
amlcompute_cluster_name = "compute-cluster"

# Verify that the cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print(f'Found existing cluster with name: {amlcompute_cluster_name}, will use it')
except ComputeTargetException:
    print(f'Compute cluster with name: {amlcompute_cluster_name} not found, will create it')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS3_V2', max_nodes=4)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True, min_node_count=1, timeout_in_minutes=10)

Found existing cluster with name: compute-cluster, will use it
Succeeded.....................................................................................................................
AmlCompute wait for completion finished

Wait timeout has been reached
Current provisioning state of AmlCompute is "Succeeded" and current node count is "0"


In [65]:
automl_settings = {
    "experiment_timeout_minutes": 30,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC_weighted'
}

automl_config = AutoMLConfig(
    compute_target=compute_target,
    task='classification',
    training_data=dataset,
    enable_early_stopping=True,
    featurization='auto',
    debug_log='automl_errors.log',
    label_column_name='Attrition_Flag',
    n_cross_validations=5,
    **automl_settings
)

In [66]:
# TODO: Submit your experiment
remote_run = experiment.submit(automl_config)

Submitting remote run.


Experiment,Id,Type,Status,Details Page,Docs Page
capstone-experiment,AutoML_e6d0077b-24a2-498d-8d1d-b5f1f7de0369,automl,NotStarted,Link to Azure Machine Learning studio,Link to Documentation


## Run Details

OPTIONAL: Write about the different models trained and their performance. Why do you think some models did better than others?

TODO: In the cell below, use the `RunDetails` widget to show the different experiments.

In [67]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.



In [68]:
automl_run_job, best_automl_model = remote_run.get_output()
print(best_automl_model)

Package:azureml-automl-runtime, training version:1.43.0, current version:1.42.0
Package:azureml-core, training version:1.43.0, current version:1.42.0
Package:azureml-dataset-runtime, training version:1.43.0, current version:1.42.0
Package:azureml-defaults, training version:1.43.0, current version:1.42.0
Package:azureml-interpret, training version:1.43.0, current version:1.42.0
Package:azureml-mlflow, training version:1.43.0.post1, current version:1.42.0
Package:azureml-pipeline-core, training version:1.43.0, current version:1.42.0
Package:azureml-responsibleai, training version:1.43.0, current version:1.42.0
Package:azureml-telemetry, training version:1.43.0, current version:1.42.0
Package:azureml-train-automl-client, training version:1.43.0, current version:1.42.0
Package:azureml-train-automl-runtime, training version:1.43.0.post1, current version:1.42.0
Package:azureml-train-core, training version:1.43.0, current version:1.42.0
Package:azureml-train-restclients-hyperdrive, training v

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=False, enable_feature_sweeping=True, feature_sweeping_config={}, feature_sweeping_timeout=86400, featurization_config=None, force_text_dnn=False, is_cross_validation=True, is_onnx_compatible=False, observer=None, task='classification', working_dir='/mnt/batch/tasks/shared/LS_root/mount...
                 PreFittedSoftVotingClassifier(classification_labels=array([0, 1]), estimators=[('0', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('lightgbmclassifier', LightGBMClassifier(min_data_in_leaf=20, n_jobs=1, problem_info=ProblemInfo(gpu_training_param_dict={'processing_unit_type': 'cpu'}), random_state=None))], verbose=False)), ('1', Pipeline(memory=None, steps=[('maxabsscaler', MaxAbsScaler(copy=True)), ('xgboostclassifier', XGBoostClassifier(n_jobs=1, problem_info=ProblemInfo(gpu_training_param_dict={'processing_unit_type': 'cpu'}), random_state=0, tree_method=

In [69]:
import joblib

# Save the best model
joblib.dump(value=best_automl_model, filename='automl_model.joblib')

['automl_model.joblib']

## Model Deployment

Remember you have to deploy only one of the two models you trained but you still need to register both the models. Perform the steps in the rest of this notebook only if you wish to deploy this model.

TODO: In the cell below, register the model, create an inference config and deploy the model as a web service.

In [87]:
from azureml.core.model import Model, InferenceConfig
from azureml.core.resource_configuration import ResourceConfiguration

model = Model.register(workspace=ws,
                      model_name='best-automl-model',
                      model_path='./automl_model.joblib',
                      resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=1),
                      description='Best model from the output of AutoML for Credit Card Churn Prediction')

Registering model best-automl-model


TODO: In the cell below, send a request to the web service you deployed to test it.

In [92]:
from azureml.core import Environment
from azureml.core.webservice import AciWebservice

# Get the output files from the AutoML run job for the best model to use for deployment
automl_run_job.download_files(output_directory='automl_output')

env = Environment.from_conda_specification('automl-env', './automl_output/outputs/conda_env_v_1_0_0.yml')

inference_config = InferenceConfig(environment=env,
                                  source_directory='./automl_output/outputs',
                                  entry_script='./scoring_file_v_2_0_0.py')

deployment_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                                       memory_gb=1,
                                                       enable_app_insights=True,
                                                       auth_enabled=True)

deployment_name = 'automl-model-deployment'
service = Model.deploy(workspace=ws,
                       name=deployment_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-08-09 05:56:32+00:00 Creating Container Registry if not exists.
2022-08-09 05:56:32+00:00 Registering the environment.
2022-08-09 05:56:34+00:00 Use the existing image.
2022-08-09 05:56:34+00:00 Generating deployment configuration.
2022-08-09 05:56:35+00:00 Submitting deployment to compute.
2022-08-09 05:56:36+00:00 Checking the status of deployment automl-model-deployment..
2022-08-09 05:59:38+00:00 Checking the status of inference endpoint automl-model-deployment.
Failed


ERROR:azureml.core.webservice.webservice:Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 9ab9faf6-db24-4823-a266-ef3b4cbf5826
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "statusCode": 400,
  "message": "Aci Deployment failed with exception: Error in entry script, FileNotFoundError: [Errno 2] No such file or directory: '/var/azureml-app/azureml-models/best-automl-model/3/model.pkl', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, FileNotFoundError: [Errno 2] No such file or directory: '/var/azureml-app/azureml-models/best-automl-model/3/model.pkl', please run print(service.get_logs()) to get details."
    }
  ]
}



WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 9ab9faf6-db24-4823-a266-ef3b4cbf5826
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "statusCode": 400,
  "message": "Aci Deployment failed with exception: Error in entry script, FileNotFoundError: [Errno 2] No such file or directory: '/var/azureml-app/azureml-models/best-automl-model/3/model.pkl', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, FileNotFoundError: [Errno 2] No such file or directory: '/var/azureml-app/azureml-models/best-automl-model/3/model.pkl', please run print(service.get_logs()) to get details."
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: 9ab9faf6-db24-4823-a266-ef3b4cbf5826\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"statusCode\": 400,\n  \"message\": \"Aci Deployment failed with exception: Error in entry script, FileNotFoundError: [Errno 2] No such file or directory: '/var/azureml-app/azureml-models/best-automl-model/3/model.pkl', please run print(service.get_logs()) to get details.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Error in entry script, FileNotFoundError: [Errno 2] No such file or directory: '/var/azureml-app/azureml-models/best-automl-model/3/model.pkl', please run print(service.get_logs()) to get details.\"\n    }\n  ]\n}"
    }
}

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
# Print logs for the Webservice
logs = service.get_logs()
for line in logs.split('\n'):
    print(line)

In [None]:
# Delete the Webservice
service.delete()

**Submission Checklist**
- I have registered the model.
- I have deployed the model with the best accuracy as a webservice.
- I have tested the webservice by sending a request to the model endpoint.
- I have deleted the webservice and shutdown all the computes that I have used.
- I have taken a screenshot showing the model endpoint as active.
- The project includes a file containing the environment details.
