In [22]:
import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.core.dataset import Dataset
from azureml.train.automl import AutoMLConfig

In [38]:
#authentication
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace

ia = InteractiveLoginAuthentication(tenant_id='16b3c013-d300-468d-ac64-7eda0820b6d3')

# You can find tenant id under azure active directory->properties
ws = Workspace.get(name='Prod',
                     subscription_id='fe38c376-b42a-4741-9e7c-f5d7c31e5873',
                     resource_group='ProdRG',auth=ia)

In [24]:
# compute target - you can use different compute targets in different steps in the pipeline
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

pipeline_cluster = "Demo-Compute-Cluster"
compute_target = ws.compute_targets[pipeline_cluster]

In [35]:
import pandas as pd

data = pd.read_csv(
    "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv"
)
data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,57,technician,married,high.school,no,no,yes,cellular,may,mon,...,1,999,1,failure,-1.8,92.893,-46.2,1.299,5099.1,no
1,55,unknown,married,unknown,unknown,yes,no,telephone,may,thu,...,2,999,0,nonexistent,1.1,93.994,-36.4,4.86,5191.0,no
2,33,blue-collar,married,basic.9y,no,no,no,cellular,may,fri,...,1,999,1,failure,-1.8,92.893,-46.2,1.313,5099.1,no
3,36,admin.,married,high.school,no,no,no,telephone,jun,fri,...,4,999,0,nonexistent,1.4,94.465,-41.8,4.967,5228.1,no
4,27,housemaid,married,high.school,no,yes,no,cellular,jul,fri,...,2,999,0,nonexistent,1.4,93.918,-42.7,4.963,5228.1,no


In [39]:
#we will store intermediate data between data preparation step and autoML step in the default datastore of the workspace.
datastore = ws.get_default_datastore()

In [40]:
from azureml.data.dataset_factory import TabularDatasetFactory

dataset = TabularDatasetFactory.register_pandas_dataframe(
    data, target=(datastore, "dataset"), name="AutoMLE2ETraininggPipeline_Classification_dataset"
)
#I creat dataset from pandas dataframe.
#Other way: You can create  tabular dataset by using ds=Dataset.Tabular.from_delimited_files(path=xxx) and register it with ds.register()

Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to dataset/3b7eaf49-af2d-493b-88a7-22e9586cc15f/
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'emp.var.rate' -> 'emp_var_rate'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'cons.price.idx' -> 'cons_price_idx'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'cons.conf.idx' -> 'cons_conf_idx'
Column header contains '.' This period will be translated to '_' as we write the data out to parquet files: 'nr.employed' -> 'nr_employed'
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [41]:
#Target: Configure the training run- Environment setup

from azureml.core import Environment
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

#env = Environment.get(workspace=ws, name='experiment_env', version='2')
curated_env=Environment.get(workspace=ws, name='AzureML-sklearn-1.0-ubuntu20.04-py38-cpu')
pipeline_run_config=RunConfiguration()
pipeline_run_config.environment=curated_env

# Add conda dependencies to the automl env:
#pipeline_run_config.environment.python.conda_dependencies = CondaDependencies.create(
 #conda_packages=['pandas'])  

In [42]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep

#1st way: OutputFileDatasetConfig without giving destination
#prepped_output_path = OutputFileDatasetConfig(name="output_path") 
# OutputFileDatasetConfig  points a directory and CSV file will be written there. Also given as parameter below.
# If you do not give destination parameter, it will copy the output to the workspaceblobstore datastore, under the path /dataset/{run-id}/{output-name}. 
# Run-id is the name value on the overview of the job and outputname is the name given as a parameter. In my example, it is output_path.

#2nd way: OutputFileDatasetConfig with giving destination
output_path = (datastore, f"azureml/classification_prep_output/")
prepped_output_path = OutputFileDatasetConfig(destination = output_path)

input_ds = Dataset.get_by_name(ws, 'AutoMLE2ETraininggPipeline_Classification_dataset')

prep_step=PythonScriptStep(
    name="Prepare AutoML Classification",
    script_name="prepare.py",
    source_directory="./Scripts",
    #arguments=['--input-data',input_ds.as_named_input('AutoMLE2ETraininggPipeline_Classification_dataset'),'--prepped-data',output],
    arguments=["--output_path",prepped_output_path],
    inputs=[input_ds.as_named_input('AutoMLE2ETraininggPipeline_Classification_dataset')],
    compute_target=compute_target,
    runconfig=pipeline_run_config
)

In [43]:
#In an ML pipeline, the input data must be a Dataset object.

prepped_test_data=prepped_output_path.read_delimited_files("prepped_train_data_classification.csv") # This is the test data to send automlstep
prepped_train_data = prepped_output_path.read_delimited_files("prepped_test_data_classification.csv")

from azureml.pipeline.core import TrainingOutput,PipelineData

metrics_data = PipelineData(name='metrics_data',
                            datastore=datastore,
                            pipeline_output_name='metrics_output',
                            training_output=TrainingOutput(type='Metrics'))

model_data = PipelineData(name='best_model_data',
                          datastore=datastore,
                          pipeline_output_name='model_output',
                          training_output=TrainingOutput(type='Model'))
# two pipelinedata objects for automl outputs: metrics and the model.


In [44]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep

# Change iterations to a reasonable number (50) to get better accuracy
automl_settings = {
    "iteration_timeout_minutes" : 60,
    "iterations" : 50,
    "experiment_timeout_hours" : 6,
    "primary_metric" : 'AUC_weighted'
}
#the run will stop after 50 iterations or 60 minutes, whichever comes first.

automl_config = AutoMLConfig(task = 'classification',
                             path = '.',
                             debug_log = 'automated_ml_errors.log',
                             compute_target= compute_target,
                           #  run_configuration = pipeline_run_config,
                             featurization = 'auto',
                             training_data = prepped_train_data,
                             test_data=prepped_test_data,
                             label_column_name = 'y',
                             **automl_settings)
# debug_log local file if you want to see logs

train_step = AutoMLStep(name='AutoML_Classification',
    automl_config=automl_config,
    outputs=[metrics_data,model_data],
    enable_default_model_output=False,
    enable_default_metrics_output=False,
    allow_reuse=True)

In [45]:
from azureml.pipeline.core.graph import PipelineParameter

# The model name with which to register the trained model in the workspace.
model_name = PipelineParameter("model_name", default_value="AutoML_Classification_Test")

register_step = PythonScriptStep(
     name="register_model",
     script_name="register_model.py",
     source_directory="./Scripts",
     allow_reuse=False,
     arguments=["--model_name", model_name, "--model_path", model_data],
     inputs=[model_data],
     compute_target=compute_target,
     runconfig=pipeline_run_config)

In [46]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment

experiment = Experiment(ws, name= "automl-classification-E2E_trainingPipeline")

pipeline = Pipeline(ws, [prep_step, train_step,register_step])

pipeline_run = experiment.submit(pipeline, show_output=True)
pipeline_run.wait_for_completion()

Created step Prepare AutoML Classification [2cc1b34b][e7404f40-b146-4bdd-90b0-bd75851b5883], (This step will run and generate new outputs)
Created step AutoML_Classification [9912095c][33bf63cf-af35-4a34-861b-1e84c2bb8ced], (This step will run and generate new outputs)Created step register_model [7a87a272][1ed8d80e-6e5e-40d4-bd5c-cace3f57056e], (This step will run and generate new outputs)

Submitted PipelineRun 68a6296d-a5b7-4dd1-bc30-206745e5bd0a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/68a6296d-a5b7-4dd1-bc30-206745e5bd0a?wsid=/subscriptions/fe38c376-b42a-4741-9e7c-f5d7c31e5873/resourcegroups/ProdRG/workspaces/Prod&tid=16b3c013-d300-468d-ac64-7eda0820b6d3
PipelineRunId: 68a6296d-a5b7-4dd1-bc30-206745e5bd0a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/68a6296d-a5b7-4dd1-bc30-206745e5bd0a?wsid=/subscriptions/fe38c376-b42a-4741-9e7c-f5d7c31e5873/resourcegroups/ProdRG/workspaces/Prod&tid=16b3c013-d300-468d-ac64-7eda0820b6d3
PipelineRun Status:

In [6]:
published_pipeline = pipeline_run.publish_pipeline(
                                                    name='classification-training-pipeline',
                                                    description='Training Pipeline - Classification',
                                                    version='1.0' 
                                                   )

published_pipeline

NameError: name 'pipeline_run' is not defined

In [23]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://uksouth.api.azureml.ms/pipelines/v1.0/subscriptions/fe38c376-b42a-4741-9e7c-f5d7c31e5873/resourceGroups/prodrg/providers/Microsoft.MachineLearningServices/workspaces/prod/PipelineRuns/PipelineSubmit/abe57942-1200-4586-878e-461fe7685551


In [8]:
from azureml.pipeline.core import Schedule,ScheduleRecurrence

weekly=ScheduleRecurrence(frequency='Week',interval=1)

pipeline_schedule=Schedule.create(ws,
                                name='weekly predictions',
                                pipeline_id='abe57942-1200-4586-878e-461fe7685551',
                                #pipeline_id=published_pipeline.id,
                                experiment_name='test',
                                recurrence=weekly)

In [21]:
pub_pipeline_id="abe57942-1200-4586-878e-461fe7685551"
schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id)

# We will iterate through the list of schedules and 
# use the last recurrence schedule in the list for further operations: 
print("Found these schedules for the pipeline id {}:".format(pub_pipeline_id))
for schedule in schedules: 
    print(schedule.id)
    fetched_schedule = Schedule.get(ws, schedule.id)
    fetched_schedule.disable(wait_for_provisioning=True)
    print("Disabled schedule {}. New status is: {}".format(fetched_schedule.id, fetched_schedule.status))
    #if schedule.recurrence is not None:
     #   schedule_id = schedule.id

#print("Schedule id to be used for schedule operations: {}".format(schedule_id))

Found these schedules for the pipeline id abe57942-1200-4586-878e-461fe7685551:
989204a8-befe-4983-8647-84305fa4d4a7
Provisioning status: Completed
Disabled schedule 989204a8-befe-4983-8647-84305fa4d4a7. New status is: Disabled
f7d6bf8b-7bee-42c5-ba57-6535b6046d26
Provisioning status: Completed
Disabled schedule f7d6bf8b-7bee-42c5-ba57-6535b6046d26. New status is: Disabled
52f8b8d9-0cd2-4b5a-aba3-e78d09d926ba
Provisioning status: Completed
Disabled schedule 52f8b8d9-0cd2-4b5a-aba3-e78d09d926ba. New status is: Disabled
6a665f15-6c55-491c-9c87-d471c8a4e319
Provisioning status: Completed
Disabled schedule 6a665f15-6c55-491c-9c87-d471c8a4e319. New status is: Disabled
9f2160d4-ff82-46e3-9474-fe9d895d7acc
Provisioning status: Completed
Disabled schedule 9f2160d4-ff82-46e3-9474-fe9d895d7acc. New status is: Disabled
6aa7c7eb-6e15-4300-944a-72c87dd9917d
Provisioning status: Completed
Disabled schedule 6aa7c7eb-6e15-4300-944a-72c87dd9917d. New status is: Disabled


In [15]:
# Use active_only=False to get all schedules including disabled schedules
schedules = Schedule.list(ws, active_only=True) 
print("Your workspace has the following schedules set up:")
for schedule in schedules:
    print("{} (Published pipeline: {}".format(schedule.id, schedule.pipeline_id))

Your workspace has the following schedules set up:
52f8b8d9-0cd2-4b5a-aba3-e78d09d926ba (Published pipeline: abe57942-1200-4586-878e-461fe7685551
989204a8-befe-4983-8647-84305fa4d4a7 (Published pipeline: abe57942-1200-4586-878e-461fe7685551
6a665f15-6c55-491c-9c87-d471c8a4e319 (Published pipeline: abe57942-1200-4586-878e-461fe7685551
9f2160d4-ff82-46e3-9474-fe9d895d7acc (Published pipeline: abe57942-1200-4586-878e-461fe7685551
6aa7c7eb-6e15-4300-944a-72c87dd9917d (Published pipeline: abe57942-1200-4586-878e-461fe7685551


In [16]:
fetched_schedule = Schedule.get(ws, schedule_id)
print("Using schedule with id: {}".format(fetched_schedule.id))

Using schedule with id: f7d6bf8b-7bee-42c5-ba57-6535b6046d26


In [20]:
# Set the wait_for_provisioning flag to False if you do not want to wait  
# for the call to provision the schedule in the backend.
fetched_schedule.disable(wait_for_provisioning=True)
fetched_schedule = Schedule.get(ws, "6aa7c7eb-6e15-4300-944a-72c87dd9917d")
print("Disabled schedule {}. New status is: {}".format(fetched_schedule.id, fetched_schedule.status))

Provisioning status: Completed
Disabled schedule 6aa7c7eb-6e15-4300-944a-72c87dd9917d. New status is: Active


In [9]:
p = PublishedPipeline.get(ws, id="abe57942-1200-4586-878e-461fe7685551")
p.disable()

NameError: name 'PublishedPipeline' is not defined

In [None]:
Create the schedule recurrence
recurrence = ScheduleRecurrence(
frequency=schedule_frequency,
interval=schedule_interval,
week_days=schedule_week_days,
time_of_day=schedule_time_of_day)

#Create the schedule
recurring_schedule = Schedule.create(
ws, name=schedule_name,
description=schedule_desc,
pipeline_id=pipeline_id,
experiment_name=experiment_name,
recurrence=recurrence)