# Automated ML

Import Dependencies.

In [1]:
from azureml.core import Workspace, Experiment
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.train.automl import AutoMLConfig

## Dataset

### Overview


In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'capstone-automl'

experiment=Experiment(ws, experiment_name)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code E95V52TEJ to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.


## Creating Compute

In [3]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

amlcompute_cluster_name = "capstone-compute"

# Verify that cluster does not exist already
try:
    aml_compute = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4)
    aml_compute = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

Found existing cluster, use it.


## AutoML Configuration

In [4]:
factory = TabularDatasetFactory()
url = 'https://raw.githubusercontent.com/krishula/AzureMLCapstone/main/heart.csv'
train_data = factory.from_delimited_files(url)

In [6]:
# TODO: Put your automl settings here
automl_settings = {
    "enable_early_stopping" : True,
    "primary_metric": 'accuracy',
    "featurization": 'auto',
    "experiment_timeout_minutes": 20,
}

# TODO: Put your automl config here
automl_config = AutoMLConfig(task = 'classification',
                             compute_target  = aml_compute,
                             blocked_models = ['KNN','LinearSVM'],
                             debug_log = 'automl_errors.log',
                             enable_onnx_compatible_models=True,
                             training_data = train_data,
                             label_column_name = 'target',
                             max_concurrent_iterations = 4,
                             **automl_settings
                            )

In [7]:
# TODO: Submit experiment
remote_run = experiment.submit(automl_config, show_output = True)

Running on remote.
No run_configuration provided, running on capstone-compute with default configuration
Running on remote compute: capstone-compute
Parent Run ID: AutoML_2bb460c2-e191-436f-8b1f-fd82c3986e13

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  Each iteration of the trained model was validated through cross-validation.
              
DETAILS:      
+---------------------------------+
|Number of folds                  |
|10                               |
+---------------------------------+

****************************************************************************************************

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are

## Run Details


In [8]:
from azureml.widgets import RunDetails

RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

## Best Model



In [9]:
best_run_aml, fitted_model_aml = remote_run.get_output()
model_name = best_run_aml.properties['model_name']
print(best_run_aml)

Run(Experiment: capstone-automl,
Id: AutoML_2bb460c2-e191-436f-8b1f-fd82c3986e13_18,
Type: azureml.scriptrun,
Status: Completed)


In [10]:
print(fitted_model_aml[-1])

PreFittedSoftVotingClassifier(classification_labels=None,
                              estimators=[('8',
                                           Pipeline(memory=None,
                                                    steps=[('standardscalerwrapper',
                                                            <azureml.automl.runtime.shared.model_wrappers.StandardScalerWrapper object at 0x7fc1482baf28>),
                                                           ('randomforestclassifier',
                                                            RandomForestClassifier(bootstrap=False,
                                                                                   ccp_alpha=0.0,
                                                                                   class_weight=None,
                                                                                   criterion='entropy',
                                                                                   max_depth=None.

In [12]:
#TODO: Save the best model
best_run_aml.download_file('./outputs/model.pkl', './outputs/heart-diesease-auto-model.pkl')
best_run_aml.download_file('./outputs/scoring_file_v_1_0_0.py', './outputs/score_aml.py')
best_run_aml.download_file('./automl_driver.py', './outputs/automl_driver.py')

## Auto ML Evaluation

In [14]:
import pickle
file = open("./outputs/heart-diesease-auto-model.pkl",'rb')
aml_model = pickle.load(file)
file.close()

In [15]:
from sklearn.metrics import accuracy_score
import pandas as pd


factory = TabularDatasetFactory()
test_data_path = "https://raw.githubusercontent.com/krishula/AzureMLCapstone/main/heart.csv"
test_ds = pd.read_csv(test_data_path)
y_test = test_ds[['target']]
print(accuracy_score(aml_model.predict(test_ds.drop(columns=['target'])), y_test))

0.9174917491749175


In [16]:
from sklearn.metrics import classification_report
print(classification_report(y_test, aml_model.predict(test_ds.drop(columns=['target']))))

              precision    recall  f1-score   support

           0       0.93      0.88      0.91       138
           1       0.91      0.95      0.93       165

    accuracy                           0.92       303
   macro avg       0.92      0.91      0.92       303
weighted avg       0.92      0.92      0.92       303



## Model Deployment

In [24]:
from azureml.core.model import Model

model = Model.register(workspace = ws,
                        model_path ="./outputs/heart-diesease-auto-model.pkl",
                        model_name = "automl_cap")

Registering model automl_cap


In [25]:
print(model.name)

automl_cap


### Inference configuration

In [31]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig


env = Environment.from_conda_specification(name='myenv',file_path = 'env.yml')

# for pip_package in ["scikit-learn", "py-xgboost", "xgboost"]:
#     env.python.conda_dependencies.add_pip_package(pip_package)

# env.python.conda_dependencies("xgboost")

inference_config = InferenceConfig(entry_script='./score.py',
                                    environment=env)

In [32]:
f = open("envnew.yml", "w")
f.write(env.python.conda_dependencies.serialize_to_string())
f.close()

print("packages", env.python.conda_dependencies.serialize_to_string())

packages channels:
- anaconda
- conda-forge
dependencies:
- pip
- python=3.6.2
- pip:
  - azureml-core==1.19.0
  - azureml-defaults==1.19.0
  - azureml-telemetry==1.19.0
  - azureml-train-core==1.19.0
  - azureml-widgets==1.19.0
  - azureml-sdk==1.19.0
  - azureml-train-automl-runtime==1.19.0
  - azureml-automl-runtime==1.19.0
  - mlflow
  - sklearn-pandas
- pandas
- numpy
- tqdm
- py-xgboost
- joblib
- xgboost
- scikit-learn==0.22.1
- matplotlib
name: azureml_a1736710baabf05d37337a101fa8d6d1



In [43]:
from azureml.core.webservice import AciWebservice,Webservice
from azureml.core.model import Model

deployment_config = AciWebservice.deploy_configuration(cpu_cores = 1, memory_gb = 1,enable_app_insights = True)
service = Model.deploy(ws, "deploy3", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output = True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running.........................................
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [44]:
logs = service.get_logs()
for line in logs.split('\n'):
    print(line)

2021-01-11T14:46:00,668571066+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_78b912a296061c9d6178810a04b724e8/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_78b912a296061c9d6178810a04b724e8/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_78b912a296061c9d6178810a04b724e8/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_78b912a296061c9d6178810a04b724e8/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_78b912a296061c9d6178810a04b724e8/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2021-01-11T14:46:00,686149926+00:00 - iot-server/run 
2021-01-11T14:46:00,686867629+00:00 - gunicorn/run 
2021-01-11T14:46:00,687186430+00:00 - rsyslog/run 
rsyslogd

## Sending a request to the web service we deployed to test it.

In [49]:
import json

test_ds1 = test_ds.drop('target',axis=1)
data= ({'data':test_ds1[0:3].to_dict(orient='records'),'method':'predict'})
test_sample=json.dumps(data)
#test_sampole = json.dumps({'data': test_ds[0:2].tolist(),'method': 'predict'})
output= service.run(test_sample)
print(output)

[1, 1, 1]


In [51]:
data = {"data":
        [
          {
            "age": 63,
            "sex": 1,
            "cp": 3,
            "trestbps": 145,
            "chol": 233,
            "fbs": 1,
            "restecg": 0,
            "thalach": 150,
            "exang": 0,
            "oldpeak": 2.3,
            "slope": 0,
            "ca": 0,
            "thal": 1,
          }
      ]}#,'method':'predict'}
test_sample = json.dumps(data)
output= service.run(test_sample)
print(output)

[1]


In [53]:
import requests
scoring_uri='http://188b1b42-7cd3-42f3-a045-9f88f276cbe9.eastus2.azurecontainer.io/score'
key=''
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
#headers['Authorization'] = f'Bearer {key}'
data = {"data":
        [
          {
            "age": 63,
            "sex": 1,
            "cp": 3,
            "trestbps": 145,
            "chol": 233,
            "fbs": 1,
            "restecg": 0,
            "thalach": 150,
            "exang": 0,
            "oldpeak": 2.3,
            "slope": 0,
            "ca": 0,
            "thal": 1,
          }
      ]
    }
test_sample = json.dumps(data)


resp = requests.post(scoring_uri, test_sample, headers=headers)
print(resp.json())

[1]


## Printing the logs of the web service and delete the service

In [47]:
service.delete()
print("service deleted")

service deleted


In [48]:
aml_compute.delete()
print("compute deleted")

compute deleted
