In [1]:
#authentication
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace

ia = InteractiveLoginAuthentication(tenant_id='16b3c013-d300-468d-ac64-7eda0820b6d3')

# You can find tenant id under azure active directory->properties
ws = Workspace.get(name='Prod',
                     subscription_id='fe38c376-b42a-4741-9e7c-f5d7c31e5873',
                     resource_group='ProdRG',auth=ia)

In [2]:
pipeline_cluster = "Demo-Compute-Cluster"
compute_target = ws.compute_targets[pipeline_cluster]

In [3]:
from azureml.core import Environment
from azureml.core.runconfig import RunConfiguration

curated_env=Environment.get(workspace=ws, name='AzureML-sklearn-1.0-ubuntu20.04-py38-cpu')
pipeline_run_config=RunConfiguration()
pipeline_run_config.environment=curated_env

In [4]:
import pandas as pd

data = pd.read_csv(
    "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
)
data=data.head(100)

In [6]:
from azureml.core import Dataset,Datastore
from azureml.data.datapath import DataPath

datastore_name= ws.get_default_datastore()
from azureml.data.dataset_factory import TabularDatasetFactory

inference_data = TabularDatasetFactory.register_pandas_dataframe(
                                                data, 
                                                target=(datastore_name, "scoring"), 
                                                name="inference_classification_dataset",
                                                description='Data for Inference', 
                                                tags={ 'type': 'TabularDataset' }
)

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to scoring/ba0a3994-283a-43b0-b162-cfc91808a0f0/
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'emp.var.rate' -> 'emp_var_rate'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'cons.price.idx' -> 'cons_price_idx'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'cons.conf.idx' -> 'cons_conf_idx'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'nr.employed' -> 'nr_employed'
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [7]:
df=inference_data.to_pandas_dataframe()
df.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp_var_rate,cons_price_idx,cons_conf_idx,euribor3m,nr_employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no
2,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,...,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no
3,36,admin.,married,high.school,no,no,no,telephone,jun,fri,...,4,999,0,nonexistent,1.4,94.465,-41.8,4.967,5228.1,no
4,27,housemaid,married,high.school,no,yes,no,cellular,jul,fri,...,2,999,0,nonexistent,1.4,93.918,-42.7,4.963,5228.1,no


In [9]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep

#OutputFileDatasetConfig with giving destination
output_path = (datastore_name, f"azureml/classification_inference_prep_output/")
prepped_output_path = OutputFileDatasetConfig(destination = output_path)

input_ds = Dataset.get_by_name(ws, 'inference_classification_dataset')

prep_step=PythonScriptStep(
    name="Prepare Data for Inference",
    script_name="prepare.py",
    source_directory="./Scripts",
    arguments=["--output_path",prepped_output_path],
    inputs=[input_ds.as_named_input('inference_Classification_dataset')],
    compute_target=compute_target,
    runconfig=pipeline_run_config
)

In [10]:
prepped_inference_data=prepped_output_path.read_delimited_files("prepped_inference_data_classification.csv") # This is the data to be used for scoring

In [11]:
from azureml.data import OutputFileDatasetConfig

output_path = (datastore_name, f"azureml/classification_scoring_results/")
inference_output_path = OutputFileDatasetConfig(destination = output_path)

In [12]:
score_step=PythonScriptStep(
    name="Scoring",
    script_name="score.py",
    source_directory="./Scripts",
    arguments=["--s_output_path",inference_output_path,"--model_name","automlmodel"],
    inputs=[prepped_inference_data],
    compute_target=compute_target,
    runconfig=pipeline_run_config
)

# 

In [13]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

experiment = Experiment(ws, name= "classification-E2E_Inference_Pipeline")

pipeline = Pipeline(ws, [prep_step,score_step])

pipeline_run = experiment.submit(pipeline, show_output=True)
pipeline_run.wait_for_completion()

Created step Prepare Data for Inference [2501214b][13c2ef0c-8e9a-4ae5-accb-37c9316abef7], (This step will run and generate new outputs)
Created step Scoring [0e0ba7c9][0b017c52-496e-41be-b0c0-cd4e11180965], (This step will run and generate new outputs)
Submitted PipelineRun 9329da75-acb1-4832-a7ac-2539afae5a5f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9329da75-acb1-4832-a7ac-2539afae5a5f?wsid=/subscriptions/fe38c376-b42a-4741-9e7c-f5d7c31e5873/resourcegroups/ProdRG/workspaces/Prod&tid=16b3c013-d300-468d-ac64-7eda0820b6d3
PipelineRunId: 9329da75-acb1-4832-a7ac-2539afae5a5f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9329da75-acb1-4832-a7ac-2539afae5a5f?wsid=/subscriptions/fe38c376-b42a-4741-9e7c-f5d7c31e5873/resourcegroups/ProdRG/workspaces/Prod&tid=16b3c013-d300-468d-ac64-7eda0820b6d3
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 91a35125-4e62-49c9-86bf-7be0eaa41e3f
Link to Azure Machine Learning Portal: https://ml.a

'Finished'

In [101]:
'''from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.data import OutputFileDatasetConfig

output_path = (datastore_name, f"azureml/classification_scoring_results/")
inference_output_path = OutputFileDatasetConfig(destination = output_path)

parallel_run_config = ParallelRunConfig(
    source_directory="./Scripts",
    entry_script="score.py",
    mini_batch_size="5",
    error_threshold=10,
    output_action="append_row",
    environment=curated_env,
    compute_target=compute_target,
    node_count=2)

parallelrun_step = ParallelRunStep(
    name='batch-scoring',
    parallel_run_config=parallel_run_config,
    inputs=[prepped_inference_data],
    output=inference_output_path,
    arguments=['--model_name','automlmodel'],
    allow_reuse=False
)

print('Steps defined')'''

Steps defined


In [14]:
published_pipeline = pipeline_run.publish_pipeline(
    name='centrica-workshop-batch-pipeline', description='Centrica Workshop- Batch scoring', version='1.0')

published_pipeline
# you can see endpoints under pipelines on UI

Name,Id,Status,Endpoint
centrica-workshop-batch-pipeline,50f9b786-9991-4abf-8e50-e3cb79f4f8e9,Active,REST Endpoint


In [15]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://uksouth.api.azureml.ms/pipelines/v1.0/subscriptions/fe38c376-b42a-4741-9e7c-f5d7c31e5873/resourceGroups/ProdRG/providers/Microsoft.MachineLearningServices/workspaces/Prod/PipelineRuns/PipelineSubmit/50f9b786-9991-4abf-8e50-e3cb79f4f8e9


In [17]:
from azureml.pipeline.core import Schedule,ScheduleRecurrence

weekly=ScheduleRecurrence(frequency='Week',interval=1)

pipeline_schedule=Schedule.create(ws,
name='weekly predictions',
pipeline_id=published_pipeline.id,
experiment_name='Batch_predictions',
recurrence=weekly)

In [18]:
# Use active_only=False to get all schedules including disabled schedules
schedules = Schedule.list(ws, active_only=True)
 
print("Your workspace has the following schedules set up:")
for schedule in schedules:
    print("{} (Published pipeline: {}".format(schedule.id, schedule.pipeline_id))

Your workspace has the following schedules set up:
3bf45cfc-478f-42a3-81e2-456178e3b082 (Published pipeline: 50f9b786-9991-4abf-8e50-e3cb79f4f8e9
