In [None]:
from azureml.core import Workspace, Model, Dataset, Datastore, Experiment, Environment, ScriptRunConfig, RunConfiguration
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.core.environment import CondaDependencies
import joblib
import os
import pandas as pd
from datetime import datetime

In [None]:
### connecting to ML workspace
### --------------------

ws = Workspace.from_config()

In [None]:
### create compute target
### --------------------

cpu_cluster_name = "cpu-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2',
                                                           min_nodes = 0, max_nodes=1)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

In [None]:
### create directory for script
### --------------------

os.makedirs('./scripts', exist_ok= True)

In [None]:
%%writefile scripts/batch_score.py

### batch_score.py
### --------------------

from azureml.core import Workspace, Model, Dataset, Datastore, Run
from azureml.core.authentication import ServicePrincipalAuthentication #
import joblib
import pandas as pd
from datetime import datetime

subscription_id = '<<insert your subscription ID>>'
resource_group = '<<insert name of your resource group>>'
workspace_name = '<<insert name of your Azure ML workspace>>'

####
 
svc_pr_password = Run.get_secret('<<name of your secret created in notebook 0>>')

svc_pr = ServicePrincipalAuthentication(tenant_id="<<insert your tenant ID>>", service_principal_id="<<insert ID of your service principal>>", service_principal_password=svc_pr_password)

###

ws = Workspace(subscription_id, resource_group, workspace_name, auth = svc_pr)

### Load data for scoring
df = Dataset.get_by_name(ws, 'german_credit_hsg').to_pandas_dataframe()
df.drop('Sno', axis = 1, inplace = True)
new_data = df[9:16]

### Load model for scoring
model = Model(workspace = ws, name='german-credit-hsg')
model.download()
loaded_model = joblib.load('model.pkl')

### Score new data
results = loaded_model.predict(new_data)
new_data['prediction'] = results

### write output csv
now = datetime.now()
now = now.strftime("%Y_%m_%d__%H_%M_%S")
filename = now + '.csv'
new_data.to_csv(filename)

### upload csv to datastore
ds = Datastore.get_default(ws)
ds.upload_files([filename], target_path = './predictions')


In [None]:
### defining run environment
### --------------------

myenv = Environment(name="myenv")

conda_dep = CondaDependencies()
conda_dep.add_pip_package("joblib")
conda_dep.add_pip_package("pandas")
conda_dep.add_pip_package("sklearn")
conda_dep.add_pip_package("azureml-sdk")

myenv.python.conda_dependencies=conda_dep

In [None]:
### creating run configuration
### --------------------

rc = RunConfiguration(script= './scripts/batch_score.py', conda_dependencies = conda_dep)

In [None]:
### create pipeline step
### --------------------

score_step = PythonScriptStep(name = 'Score step', script_name = 'batch_score.py', source_directory = './scripts', compute_target = compute_target, runconfig = rc, allow_reuse = False)


In [None]:
### create & validate pipeline
### --------------------

steps = [score_step]

pipeline = Pipeline(workspace=ws, steps=steps)
pipeline.validate()

In [None]:
experiment = Experiment(ws, 'Batch-Scoring-Remote')

run = experiment.submit(pipeline)

In [None]:
### publish pipeline
### --------------------

#pipeline.publish(name='pred-ops-concept-test')