In [12]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.metrics import accuracy_score
import pkg_resources
import joblib

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset
from azureml.core.model import Model
from azureml.core import Environment
from azureml.core.model import InferenceConfig

from azureml.pipeline.steps import AutoMLStep

from preprocessing import get_hyperd_data, main

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.42.0


In [2]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

quick-starts-ws-199272
aml-quickstarts-199272
westeurope
6971f5ac-8af1-446e-8034-05acea24681f


In [None]:
exp = Experiment(workspace=ws, name="udacity-project")
run = exp.start_logging()

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException

# NOTE: update the cluster name to match the existing cluster
# Choose a name for your CPU cluster
compute_cluster_name = "ndeg-prj2-clust"

# Verify that cluster does not exist already
try:
    compute_cluster = ComputeTarget(workspace=ws, name=compute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           max_nodes=4)
    compute_cluster = ComputeTarget.create(ws, compute_cluster_name, compute_config)

compute_cluster.wait_for_completion(show_output=True)
# For a more detailed view of current AmlCompute status, use get_status().

In [6]:
train_ds, test_ds = get_hyperd_data(ws)

Loading datasets from workspace ...


In [None]:
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import choice, uniform
from azureml.core import Environment, ScriptRunConfig
import os

# Specify parameter sampler
ps = RandomParameterSampling( {
    '--n_estimators': choice(range(2, 100)),
    '--max_depth': choice(range(2, 10)),
    '--max_features': choice(range(1, 14)),
    '--min_samples_leaf': uniform(0.01, 0.5)
})

# Specify a Policy
policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1) # evaluate performance every two runs,
                                                               # stop if lower than 1% point difference to
                                                               # best result in previous two runs

if "training" not in os.listdir():
    os.mkdir("./training")

# Setup environment for your training run
sklearn_env = Environment.from_conda_specification(name='sklearn-env', file_path='conda_dependencies.yml')

# Create a ScriptRunConfig Object to specify the configuration details of your training job
src = ScriptRunConfig(
    source_directory=".",
    script="preprocessing.py",
    #arguments=['--input-data', train_ds.as_named_input('train')],
    compute_target=compute_cluster, # use the previously created compute cluster
    environment=sklearn_env
)

# Create a HyperDriveConfig using the src object, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
    hyperparameter_sampling=ps,
    policy=policy,
    primary_metric_name='accuracy',
    primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
    max_total_runs=20,
    max_concurrent_runs=3)

In [None]:
# Submit your hyperdrive run to the experiment and show run details with the widget.
hyperdrive_run = exp.submit(config=hyperdrive_config)
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
# Get your best run and save the model from that run.
# Also show the primary metric for the best model found and
# display the hyperparameters of the best model.

best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']

print('Best run ID:', best_run.id)
print('Accuracy:', best_run_metrics['accuracy'])
print('Parameters:', parameter_values)

best_run.download_file('outputs/model.pkl', './outputs/best_model_hyperdrive.pkl', _validate_checksum=True)

In [7]:
best_model = joblib.load('./outputs/best_model_hyperdrive.pkl')
test_df = test_ds.to_pandas_dataframe()
X_test = test_df.drop(['income'], axis=1).to_numpy()
y_test = test_df['income'].to_numpy()
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.8321356182052699


In [10]:
model = Model.register(ws, model_name="adult-model", model_path="./outputs/best_model_hyperdrive.pkl")

Registering model adult-model


In [13]:
env = Environment(name="project_environment")
dummy_inference_config = InferenceConfig(
    environment=env,
    source_directory="./source_dir",
    entry_script="./echo_score.py",
)

In [14]:
from azureml.core.webservice import LocalWebservice

deployment_config = LocalWebservice.deploy_configuration(port=6789)

In [15]:
service = Model.deploy(
    ws,
    "myservice",
    [model],
    dummy_inference_config,
    deployment_config,
    overwrite=True,
)
service.wait_for_deployment(show_output=True)

Downloading model adult-model:1 to /tmp/azureml_b6m41pe7/adult-model/1
Generating Docker build context.
2022/06/24 09:16:32 Downloading source code...
2022/06/24 09:16:33 Finished downloading source code
2022/06/24 09:16:34 Creating Docker network: acb_default_network, driver: 'bridge'
2022/06/24 09:16:34 Successfully set up Docker network: acb_default_network
2022/06/24 09:16:34 Setting up Docker configuration...
2022/06/24 09:16:34 Successfully set up Docker configuration
2022/06/24 09:16:34 Logging in to registry: 22ae24065bc443628893486d17f8a80c.azurecr.io
2022/06/24 09:16:35 Successfully logged into 22ae24065bc443628893486d17f8a80c.azurecr.io
2022/06/24 09:16:35 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2022/06/24 09:16:35 Scanning for dependencies...
2022/06/24 09:16:36 Successfully scanned dependencies
2022/06/24 09:16:36 Launching container with name: acb_step_0
Sending build context to Docker daemon  66.56kB
Step 1


done
#
# To activate this environment, use
#
#     $ conda activate /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1
#
# To deactivate an active environment, use
#
#     $ conda deactivate

[91m

  current version: 4.9.2
  latest version: 4.13.0

Please update conda by running

    $ conda update -n base -c defaults conda


Removing intermediate container c2e7d895632c
 ---> a2443466cfdb
Step 9/21 : ENV PATH /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/bin:$PATH
 ---> Running in a71f301637e9
Removing intermediate container a71f301637e9
 ---> db1014eef1e7
Step 10/21 : COPY azureml-environment-setup/send_conda_dependencies.py azureml-environment-setup/send_conda_dependencies.py
 ---> 7689a83f6aa7
Step 11/21 : RUN echo "Copying environment context"
 ---> Running in 83b47dfbb079
Copying environment context
Removing intermediate container 83b47dfbb079
 ---> fdb5a3fbadcb
Step 12/21 : COPY azureml-environment-setup/environment_context.json azureml-environment-setup/environmen

Package creation Succeeded
Logging into Docker registry 22ae24065bc443628893486d17f8a80c.azurecr.io
Logging into Docker registry 22ae24065bc443628893486d17f8a80c.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM 22ae24065bc443628893486d17f8a80c.azurecr.io/azureml/azureml_a5d20ba36fc1a170b4082f3f2d323347
 ---> ba0d2f8d95dd
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> eba33da3db1c
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6IjY5NzFmNWFjLThhZjEtNDQ2ZS04MDM0LTA1YWNlYTI0NjgxZiIsInJlc291cmNlR3JvdXBOYW1lIjoiYW1sLXF1aWNrc3RhcnRzLTE5OTI3MiIsImFjY291bnROYW1lIjoicXVpY2stc3RhcnRzLXdzLTE5OTI3MiIsIndvcmtzcGFjZUlkIjoiMjJhZTI0MDYtNWJjNC00MzYyLTg4OTMtNDg2ZDE3ZjhhODBjIn0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in 7972b1880bc9
 ---> cb038c1ff4be
Step 4/5 : RUN mv '/var/azureml-app/tmpr6j0rkis.py' /var/azureml-app/main.py
 ---> Running in b2afbb391f05
 --

In [16]:
print(service.get_logs())

/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-06-24T09:27:44,303393803+00:00 - iot-server/run 
/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-06-24T09:27:44,305936145+00:00 - rsyslog/run 
2022-06-24T09:27:44,307419569+00:00 - gunicorn/run 
2022-06-24T09:27:44,308899094+00:00 | gunicorn/run | 
2022-06-24T09:27:44,310883427+00:00 | gunicorn/run | ###############################################
/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-06-24T09:27:44,313160264+00:00 | gunicorn/run | AzureML Container Runtime Information
bash: /azureml-envs/azureml_e

In [17]:
import requests
import json

uri = service.scoring_uri
requests.get("http://localhost:6789")
headers = {"Content-Type": "application/json"}
data = {
    "query": "What color is the fox",
    "context": "The quick brown fox jumped over the lazy dog.",
}
data = json.dumps(data)
response = requests.post(uri, data=data, headers=headers)
print(response.json())

test is {'query': 'What color is the fox', 'context': 'The quick brown fox jumped over the lazy dog.'}


In [18]:
print(service.get_logs())

/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-06-24T09:27:44,303393803+00:00 - iot-server/run 
/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-06-24T09:27:44,305936145+00:00 - rsyslog/run 
2022-06-24T09:27:44,307419569+00:00 - gunicorn/run 
2022-06-24T09:27:44,308899094+00:00 | gunicorn/run | 
2022-06-24T09:27:44,310883427+00:00 | gunicorn/run | ###############################################
/bin/bash: /azureml-envs/azureml_ee84bd7cfbdbc3e7def54164ac0068c1/lib/libtinfo.so.6: no version information available (required by /bin/bash)
2022-06-24T09:27:44,313160264+00:00 | gunicorn/run | AzureML Container Runtime Information
bash: /azureml-envs/azureml_e