In [8]:
# should restart the kernel each time update the package
# update azure storage (change version) in order to use BlockBlobService

!pip install azure-storage==0.30.0

Collecting azure-storage==0.30.0
[?25l  Downloading https://files.pythonhosted.org/packages/76/28/e74b38107b3c087e4de18dc20bfb15f6c3d9b766ae827bf42fc79170ffe2/azure-storage-0.30.0.zip (153kB)
[K    100% |████████████████████████████████| 163kB 4.6MB/s ta 0:00:01
Building wheels for collected packages: azure-storage
  Running setup.py bdist_wheel for azure-storage ... [?25ldone
[?25h  Stored in directory: /home/nbuser/.cache/pip/wheels/8c/00/22/879600d3b3e5d10fa31312d498f9ae8ac6cc6d2c59aac5acbb
Successfully built azure-storage
Installing collected packages: azure-storage
  Found existing installation: azure-storage 0.20.0
    Uninstalling azure-storage-0.20.0:
      Successfully uninstalled azure-storage-0.20.0
Successfully installed azure-storage-0.30.0


In [1]:
import os
import numpy as np
import pandas as pd

import azureml
from azureml.core import Workspace, Run
from azureml.core.model import Model

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  0.1.65


## Load workspace

In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

Found the config file in: /home/nbuser/library/aml_config/config.json
Xiangzhe-WS	westeurope	Xiangzhe-ML	westeurope


## Retrieve the model

You registered a model in your workspace in the previous tutorial.

Don't have to execute this sell every time.

In [3]:
model = Model(ws, 'nyc_taxi_model')
model.download(target_dir = '.')
 
# verify the downloaded model file
os.stat('./nyc_taxi_model.pkl')

Exception: Illegal state. Paths in target_dir is []

## Deploy as web service

Once you've tested the model and are satisfied with the results, deploy the model as a web service hosted in ACI.

### Create scoring script

Create the scoring script, called `score.py`, used by the web service call to show how to use the model.

You must include two required functions into the scoring script:

- The `init()` function, which typically loads the model into a global object. This function is run only once when the Docker container is started.

- The `run(input_data)` function uses the model to predict a value based on the input data. Inputs and outputs to the run typically use JSON for serialization and de-serialization, but other formats are supported.

In [4]:
%%writefile score.py

import os
import json
import pickle
import numpy as np

from sklearn.externals import joblib
from sklearn.linear_model import LinearRegression

from azureml.core.model import Model

def init():
    global model
    # retreive the path to the model file using the model name
    model_path = Model.get_model_path('nyc_taxi_model')
    model = joblib.load(model_path)

def run(raw_data):
    data = np.array(json.loads(raw_data)['data'])
    # make prediction
    y_hat = model.predict(data)
    return json.dumps(y_hat.tolist())

Overwriting score.py


### Create environment file

Next, create an environment file, called myenv.yml, that specifies all of the script's package dependencies. This file is used to ensure that all of those dependencies are installed in the Docker image.

In [5]:
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies()
myenv.add_conda_package("scikit-learn")
myenv.add_conda_package("numpy")
myenv.add_conda_package("pandas")
myenv.add_pip_package("pynacl==1.2.1")

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

Review the content of the `myenv.yml` file.

In [6]:
with open("myenv.yml","r") as f:
    print(f.read())

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
    # Required packages for AzureML execution, history, and data preparation.
  - azureml-defaults
  - pynacl==1.2.1
- scikit-learn
- numpy
- pandas



### Create configuration file

Create a deployment configuration file and specify the number of CPUs and gigabyte of RAM needed for your ACI container. While it depends on your model, the default of 1 core and 1 gigabyte of RAM is usually sufficient for many models. If you feel you need more later, you would have to recreate the image and redeploy the service.

In [7]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={"data": "nyc-taxi",  "method" : "sklearn"}, 
                                               description='Predict taxi trip duration with sklearn')

### Deploy in ACI

Estimated time to complete: about **7-8 minutes**

Configure the image and deploy. The following code goes through these steps:

- Build an image using:
    - The scoring file (score.py)
    - The environment file (myenv.yml)
    - The model file
- Register that image under the workspace.
- Send the image to the ACI container.
- Start up a container in ACI using the image.
- Get the web service HTTP endpoint.

In [11]:
%%time
from azureml.core.webservice import Webservice
from azureml.core.image import ContainerImage

# configure the image
image_config = ContainerImage.image_configuration(execution_script="score.py", 
                                                  runtime="python", 
                                                  conda_file="myenv.yml")

service = Webservice.deploy_from_model(workspace=ws,
                                       name='nyc-taxi-dsvm-service',
                                       deployment_config=aciconfig,
                                       models=[model],
                                       image_config=image_config)

service.wait_for_deployment(show_output=True)

WebserviceException: Error, there is already a service with name nyc-taxi-dsvm-service found in workspace Xiangzhe-WS

Get the scoring web service's HTTP endpoint, which accepts REST client calls. This endpoint can be shared with anyone who wants to test the web service or integrate it into an application.

In [12]:
print(service.scoring_uri)

NameError: name 'service' is not defined

### Test deployed service

In [10]:
from sklearn import preprocessing

pd_dataframe = pd.read_pickle("sub_data_after_prep.pkl")

y_test = np.array(pd_dataframe["trip_duration"]).astype(float)
y_test = np.log(y_test)
X_test = np.array(pd_dataframe.drop(["trip_duration"],axis = 1))

# normalize input
scaler = preprocessing.StandardScaler().fit(X_test)
X_test = scaler.transform(X_test)



In [11]:
import json
from sklearn.metrics import mean_squared_error

# find 30 random samples from test set
n = 30
sample_indices = np.random.permutation(X_test.shape[0])[0:n]

test_samples = json.dumps({"data": X_test[sample_indices].tolist()})
test_samples = bytes(test_samples, encoding = 'utf8')

# predict using the deployed model
y_pred = json.loads(service.run(input_data = test_samples))

mse = mean_squared_error(y_test[sample_indices], y_pred)
print("Mean Squared Error for Linear Regression: {}".format(mse))

Mean Squared Error for Linear Regression: 0.22459201319889807


You can also send raw HTTP request to test the web service. Each time, we can only test one raw.

In [12]:
import requests
import json
from sklearn.metrics import mean_squared_error

# send a random row from the test set to score
random_index = np.random.randint(0, len(X_test)-1)
input_data = "{\"data\": [" + str(list(X_test[random_index])) + "]}"

headers = {'Content-Type':'application/json'}

y_pred = requests.post(service.scoring_uri, input_data, headers = headers)

print("POST to url", service.scoring_uri)
#print("input data:", input_data)
print("label:", y_test[random_index])
print("prediction:", y_pred.text)

POST to url http://13.80.134.36:80/score
label: 6.0473721790462776
prediction: "[6.232244119527342]"


## Clean up a web service

In [None]:
service.delete()