In [5]:
from azureml.core import Workspace, Experiment, Environment
from azureml.core.environment import CondaDependencies
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.pipeline.core import ScheduleRecurrence, Schedule

ModuleNotFoundError: No module named 'azureml.core'

In [3]:
ws = Workspace.from_config()

In [9]:
myenv = Environment(name='env_azure_pipeline')
myenv_dep = CondaDependencies.create(conda_packages=['pandas', 'scikit-learn', 'pip'], 
                                    pip_packages=['azureml-sdk', 'azureml-dataset-runtime'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210806.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "env_azure_pipeline",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-fo

In [10]:
compute_name = 'rk-test-compute'
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', 
                                                       max_nodes=2)

rk_cluster = ComputeTarget.create(ws, compute_name, compute_config)
rk_cluster.wait_for_completion(show_output=True)

SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# How to make this work?

```python 
import joblib 
import pandas as pd
from azureml.core import Model

model_path = Model.get_model_path('classification_model', 
                                  _workspace=ws)
model = joblib.load(model_path)


batch_data_df = batch_data.to_pandas_dataframe()
# X = batch_data_df.drop('y', axis=1)
model.predict(batch_data_df)
```

# Run experiment 

In [11]:
parallel_run_config = ParallelRunConfig(source_directory='.', 
                                       entry_script='batch_scoring_script.py', 
                                       mini_batch_size='1KB', 
                                       error_threshold=10, 
                                       output_action='append_row', 
                                       environment=myenv, 
                                       compute_target=rk_cluster, 
                                       node_count=2)

In [12]:
# Registered model name 
model_used = 'classification_model'

# this is our batch data to make predictions 
batch_data = ws.datasets['Irish Data 1'] 

# Setting up output folder 
default_ds = ws.get_default_datastore()
output_dir = PipelineData(name='batch_inference', 
                          datastore=default_ds)

In [13]:
parallel_run_step = ParallelRunStep(name='batch_score', 
                                   inputs=[batch_data.as_named_input('batch_data')], 
                                   output=output_dir, 
                                   parallel_run_config=parallel_run_config, 
                                   arguments=['--model_name', model_used], 
                                   allow_reuse=True)

pipeline = Pipeline(workspace=ws, 
                    steps=[parallel_run_step])

In [14]:
pipeline_run = Experiment(workspace=ws, 
                         name='batch_prediction_pipeline').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

Created step batch_score [c2de2d09][82aaf4a3-ae9f-4d7e-aeee-39a72544e1bc], (This step will run and generate new outputs)
Submitted PipelineRun abfdd3ec-78d0-4751-acd3-b41875bdfe7b
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/abfdd3ec-78d0-4751-acd3-b41875bdfe7b?wsid=/subscriptions/038a8790-7ab1-483b-abba-30f101e8dcce/resourcegroups/aml-resources-mstutorial/workspaces/aml-mstutorial&tid=68fda48c-5b34-479d-91f9-034da6f0efe3
PipelineRunId: abfdd3ec-78d0-4751-acd3-b41875bdfe7b
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/abfdd3ec-78d0-4751-acd3-b41875bdfe7b?wsid=/subscriptions/038a8790-7ab1-483b-abba-30f101e8dcce/resourcegroups/aml-resources-mstutorial/workspaces/aml-mstutorial&tid=68fda48c-5b34-479d-91f9-034da6f0efe3
PipelineRun Status: NotStarted
PipelineRun Status: Running


Expected a StepRun object but received <class 'azureml.core.run.Run'> instead.
This usually indicates a package conflict with one of the dependencies of azureml-core or azureml-pipeline-core.
Please check for package conflicts in your python environment






ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "User program failed with Exception: Run failed, please check logs for details. You can check logs/readme.txt for the layout of logs.",
        "messageParameters": {},
        "detailsUri": "https://aka.ms/azureml-run-troubleshooting",
        "details": []
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"User program failed with Exception: Run failed, please check logs for details. You can check logs/readme.txt for the layout of logs.\",\n        \"messageParameters\": {},\n        \"detailsUri\": \"https://aka.ms/azureml-run-troubleshooting\",\n        \"details\": []\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}

```python 
prediction_run = next(pipeline_run.get_childern())
prediction_output = prediction_run.get_output_data('batch_inference')
prediction_output.download(local_path='results')
```

# Deploy 

```python 
published_pipeline = pipeline_run.publish_pipeline(name='batch_prediction_pipeline', 
                                                   description='Batch pipeline demo', 
                                                   version='1.0')
print('Published Pipeline', published_pipeline)

rest_endpoint = published_pipeline.endpoint
print('Rest Endpoint', rest_endpoint)

hourly = ScheduleRecurrence(frequency='Hour', interval=1)
pipeline_schedule = Schedule.create(workspace=ws, 
                                   name='trains model hourly', 
                                   pipeline_id=published_pipeline.id, 
                                   experiment_name='batch_prediction_pipeline', 
                                   recurrence=hourly)
print('Pipeline Schedule', pipeline_schedule)
```

# Deleting things 

In [None]:
pipeline_schedule.disable()

In [42]:
rk_cluster.delete()