In [2]:
# Import dependencies
import train_xgb
import json

In [None]:
from azureml.core import Workspace
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
# Create compute cluster
# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           min_nodes=1,
                                                           max_nodes=5)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

In [None]:

from azureml.data.dataset_factory import TabularDatasetFactory
# Try to load the dataset from the workspace. Otherwise, load if from Kaggle
found = False
ds_key = 'Ames-housing-dataset'
ds_desc = 'Ames Housing training data.'

if ds_key in ws.datasets.keys():
    found = True
    dataset = ws.datasets[ds_key]
    print(f'Found registered {ds_key}, use it.')
    
if not found:
    train, test = train_xgb.load_data_clean(source='kaggle')
    print(f"train.shape = {train.shape}, test.shape = {test.shape}")
    # Register the train dataset
    blob = ws.get_default_datastore()
    dataset = TabularDatasetFactory.register_pandas_dataframe(train, blob, name=ds_key, description=ds_desc)

In [None]:
%%writefile conda_env.yml

dependencies:
- python=3.6.2
- pip:
  - inference-schema
  - azureml-defaults==1.32.0
- numpy>=1.16.0,<1.19.0
- pandas==0.25.1
- scikit-learn==0.22.1
- py-xgboost<=0.90
channels:
- anaconda
- conda-forge

In [None]:
with open('hdr-outputs/best_hdr_metrics.json', 'r') as file:
    best_hdr_metrics = json.load(file)

In [None]:
from azureml.core import Model
# Register the best model
model = Model.register(ws, model_path='hdr-outputs/model.pkl', model_name='Ames-Housing-XGB-Model', tags=best_hdr_metrics)
print(model.name, model.id, model.version, sep='\t')

In [None]:

from azureml.core.webservice import AciWebservice
aciconfig = AciWebservice.deploy_configuration(cpu_cores=1,
                                              memory_gb=1,
                                              tags={"data" : "Kaggle", "method" : "XGB"},
                                              description="Predict Ames Housing Prices",
                                              auth_enabled=True,
                                              enable_app_insights=True)

In [None]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

env = Environment("project-env")
cd = CondaDependencies('conda_env.yml')
env.python.conda_dependencies = cd
# Register environment to re-use later
env.register(workspace=ws)

In [None]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment

ws = Workspace.from_config()
model = Model(ws, 'Ames-Housing-XGB-Model')

myenv = Environment.get(workspace=ws, name="project-env", version="1")

inference_config = InferenceConfig(entry_script="entry_script.py", environment=myenv)

service_name = 'ames-housing-xgb-' + str(uuid.uuid4())[:4]
service = Model.deploy(workspace=ws,
                      name=service_name,
                      models=[model],
                      inference_config=inference_config,
                      deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

In [None]:
print(service.get_logs())

In [None]:
# Prepare data for request
_ , test = train_xgb.load_data_clean()
test = train_xgb.label_encode(test)
data = {'data': test.head().to_dict(orient='list')}

# Replace the next cell with the code from 'Consume' tab of the endpoint
# and delete 'data = {}' assignment as data is defined in this cell!

In [None]:
print(service.get_logs())
# Clean up resources
service.delete()
cpu_cluster.delete()