## Import required packages

In [None]:
from azureml.core import Workspace, Experiment, Datastore, Environment, Dataset
from azureml.core.compute import ComputeTarget
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DEFAULT_CPU_IMAGE
from azureml.pipeline.core import Pipeline
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineParameter, PipelineData
from azureml.data.output_dataset_config import OutputTabularDatasetConfig, OutputDatasetConfig, OutputFileDatasetConfig
from azureml.data.datapath import DataPath
import logging

## Connect to Azure ML Workspace using the AML SDK
The code snippet below retrieves a reference to your AML workspace - you can interact directly with resources in your workspace via the SDK, similar to how you can use the Studio UI.

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

## Create an Experiment
Experiments are logical containers of script runs which can hold different metrics and experiments. 

In [None]:
from azureml.core import Experiment

# TO-DO:
# Update the experiment_name variable below to 'yourinitials_batch_scoring_pipeline_run`

experiment_name = "<YOUR-EXPERIMENT-NAME>"
experiment = Experiment(ws, experiment_name)

## Retrieve a Reference to Compute Cluster
Get a pointer to your created AML Compute Cluster (`cpucluster-yourinitials`). You will use this as the compute engine for executing your script run.

In [None]:
from azureml.core.compute import ComputeTarget

# TO-DO:
# Update the cpu_cluster_name variable below to the name of the cluster you previously created (cpucluster-yourinitials)

cpu_cluster_name = "<YOUR-COMPUTE-TARGET-NAME>"
cpu_cluster = ComputeTarget(ws, cpu_cluster_name)

## Retrieve Curated AutoML Environment from Azure ML Workspace
AML environments are reusable software environments that contain dependencies for model training/inferencing operations. These environments can be manually created, packaged into reusable docker containers, and then leveraged time and again for different MLOps activities.

AML supports a number of curated environments for popular open-source ML frameworks (TensorFlow, Pytorch, Scikit, etc.) including one for AutoML (AzureML-AutoML) which we will leverage here.

In [None]:
from azureml.core import Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.core.conda_dependencies import CondaDependencies

env = Environment.get(ws, 'AzureML-AutoML')

run_config = RunConfiguration()
run_config.environment = env

## Define Pipeline Parameters
`PipelineParameter` objects serve as variable inputs to an Azure ML pipeline and can be specified at runtime. Update the pipeline parameters below to include parameters for the following variables:

| Pipeline Parameter Name |  Description |
|-------------------------|--------------|
| `model_name`  | The name of the model in your model registry which you will use for batch scoring (if you have completed Challenge 5 this should be the same value  you used for the `model_name` parameter there) |
| `datastore_name` | The name of the datastore where your output dataset will be saved |
| `inferencing_dataset_name` | The name of the test dataset you created ('<i>YOURINITIALS</i>-HOME-PRICE-TEST-DATA') |
| `scored_dataset_name` | Name of the dataset to be created upon pipeline execution with your scored home price results |

In [None]:
# TO-DO: Update the default values of the PipelineParameters below according to the description in the table above

model_name = PipelineParameter(name='model_name', default_value='')
datastore_name = PipelineParameter(name='datastore_name', default_value='')
inferencing_dataset_name = PipelineParameter(name='inferencing_dataset_name', default_value='')
scored_dataset_name = PipelineParameter(name='scored_dataset_name', default_value='')

## Define Pipeline Steps
The pipeline below consists of a single step which executes an associated python script located in the `./pipeline_script_steps`. Here, we call `batch_score_data.py` and retrieve a dataset from your AML workspace (referenced by  `inferencing_dataset_name`), load your ML model (referenced by `model_name`) into code, make predictions over the loaded data, and then save that as a new dataset (referenced by `scored_dataset_name` and `datastore_name`).

In [None]:
# Get inferencing data from AML-linked datastore
# Use your trained model to make predictions over your data
batch_score_data_step = PythonScriptStep(
    name='Batch Score Home Data',
    script_name='batch_score_data.py',
    arguments =['--model_name', model_name,
                '--datastore_name', datastore_name,
                '--inferencing_dataset_name', inferencing_dataset_name,
                '--scored_dataset_name', scored_dataset_name
               ],
    compute_target=cpu_cluster,
    source_directory='./pipeline_step_scripts',
    allow_reuse=False,
    runconfig=run_config
)

## Create Pipeline and Run
Create an Azure ML Pipeline by specifying the steps to be executed then submit a new run. 

In [None]:
pipeline = Pipeline(ws, [batch_score_data_step])
run = experiment.submit(pipeline)