# Test Deployed Webservice
In this model we test the deployed webservice by scoring data and saving the output of all records of defaults to an output file.

In [1]:
model_name="payment_default"
aci_service_name = 'default-payment'
scoring_input_path='ml_scoring_input'

### Initialize an existing Workspace

Initialize a workspace object using Service Principal Authentication

In [2]:
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.workspace import Workspace
import pandas as pd

cli_auth = AzureCliAuthentication()
ws = Workspace.from_config(auth=cli_auth)
ws.get_details()

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
SDK version,1.2.0
Subscription ID,b9f1c816-0637-419c-b0f6-a8392690c7aa
Workspace,paydmlws91
Resource Group,paydmlrg91
Location,westeurope


### Connect to Data
Connect or Register the Target Data Storage Account
We connect to an existing registed data store with Azure ML, else we register the storage account that we want to connect to. Next, we can read and process the data

In [7]:
from azureml.core.datastore import Datastore
from azureml.core.dataset import Dataset
from azureml.data.data_reference import DataReference

keyvault = ws.get_default_keyvault()

blobstore_name = keyvault.get_secret(name="DATASTORENAME")

print(f"Retrieved Blob Store Name : {blobstore_name}")

blob_datastore = Datastore.get(ws, blobstore_name)
print(f"Found Blob Datastore with name: {blobstore_name}")

Retrieved Blob Store Name : payments_data
Found Blob Datastore with name: payments_data


### Load the Data
We can load the payments dataset we want to score from csv files in a referenced blob storage account. The data as been processed to only contains features needed to do the scoring. This appoach allows the data to be in different files which are all collected into one dataset and then scored.

In [8]:
from azureml.core import Dataset
from azureml.data.datapath import DataPath

scoring_input_path='ml_scoring_input'

datastore_path = [
   DataPath(blob_datastore, scoring_input_path + '/*.csv')
]
dataset = Dataset.Tabular.from_delimited_files(path=datastore_path)

# drop the ID column
dataset = dataset.drop_columns(columns=["default_payment"])

# preview the first 3 rows of the dataset
df = dataset.to_pandas_dataframe()
df.head(3)

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT3,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6
0,260000.0,2,1,2,51,-1,-1,-1,-1,-1,...,9966,8517,22287,13668,21818,9966,8583,22301,0,3640
1,630000.0,2,2,2,41,-1,0,-1,-1,-1,...,6500,6500,6500,2870,1000,6500,6500,6500,2870,0
2,250000.0,1,1,2,29,0,0,0,0,0,...,63561,59696,56875,55512,3000,3000,3000,3000,3000,3000


### Score the Dataframe
Loop through the dataframe scoring each row.

In [9]:
import requests
import json

# URL for the web service
scoring_uri = 'http://90b206db-2868-4ddc-92ba-234b648c8031.westeurope.azurecontainer.io/score'

# If the service is authenticated, set the key or token
key = '<your key or token>'
# Set the content type
headers = {'Content-Type': 'application/json'}
# If authentication is enabled, set the authorization header
headers['Authorization'] = f'Bearer {key}'

def predict(x):
    input_data = x.to_json()
    input_data = f'{{"data" : [ {input_data}]}}'
    resp = json.loads(requests.post(scoring_uri, input_data, headers=headers).json())
    print(f"{resp} | ", end='') # Include this is if you want to print out progress
    # The POST retuns an array, just return first item with [0]
    score = resp.get("result")[0]
    return score

# To test a subnet-  df[1:5].apply(predict, axis=1)
scores = df.apply(predict, axis=1)

# Add the scores to the dataframe
df['score'] = scores
df

{'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [1]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [0]} | {'result': [1]} | {'result': [0]} | {'result': [0]} | {'result': [1]} | {'result': [0]} | {'result': [0]} | {'result': [1]} | 

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,score
0,260000.0,2,1,2,51,-1,-1,-1,-1,-1,...,8517,22287,13668,21818,9966,8583,22301,0,3640,0
1,630000.0,2,2,2,41,-1,0,-1,-1,-1,...,6500,6500,2870,1000,6500,6500,6500,2870,0,0
2,250000.0,1,1,2,29,0,0,0,0,0,...,59696,56875,55512,3000,3000,3000,3000,3000,3000,0
3,50000.0,2,3,3,23,1,2,0,0,0,...,28771,29531,30211,0,1500,1100,1200,1300,1100,0
4,130000.0,2,3,2,39,0,0,0,0,0,...,20616,11802,930,3000,1537,1000,2000,930,33764,0
5,70000.0,2,2,2,26,2,0,0,2,2,...,44006,46905,46012,2007,3582,0,3601,0,1820,1
6,450000.0,2,1,1,40,-2,-2,-2,-2,-2,...,560,0,0,19428,1473,560,0,0,1128,0
7,90000.0,1,1,2,23,0,0,0,-1,0,...,5398,6360,8292,5757,0,5398,1200,2045,2000,0
8,50000.0,1,3,2,23,0,0,0,0,0,...,28967,29829,30046,1973,1426,1001,1432,1062,997,0
9,50000.0,2,3,1,47,-1,-1,-1,-1,-1,...,2040,30430,257,3415,3421,2044,30430,257,0,0


In [10]:
scored = df.loc[df['score'] == 1]
scored

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,score
5,70000.0,2,2,2,26,2,0,0,2,2,...,44006,46905,46012,2007,3582,0,3601,0,1820,1
18,210000.0,1,2,1,34,3,2,2,2,2,...,2500,2500,2500,0,0,0,0,0,0,1
21,80000.0,1,2,2,34,2,2,2,2,2,...,77519,82607,81158,7000,3500,0,7000,0,4000,1
24,30000.0,1,2,2,37,4,3,2,-1,0,...,20878,20582,19357,0,0,22000,4200,2000,3100,1


In [11]:
import time
filename = time.strftime("scored-%Y%m%d-%H%M%S.csv")
scored.to_csv(filename)

In [12]:
import os
scoring_output_path='ml_scoring_output'

print(f"Uploading {filename}")
dref = blob_datastore.upload_files([filename], target_path=scoring_output_path, overwrite=False, show_progress=False) 
print("Done.")
os.remove(filename)

Uploading scored-20200402-214418.csv
Done.
