In [1]:
from azureml.core import Workspace, Experiment, Environment
from azureml.core.environment import CondaDependencies
from azureml.core.compute import AmlCompute, ComputeTarget
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.pipeline.core import ScheduleRecurrence, Schedule

Failure while loading azureml_run_type_providers. Failed to load entrypoint automl = azureml.train.automl.run:AutoMLRun._from_run_dto with exception (numpy 1.20.2 (c:\users\rkoirala\anaconda3\lib\site-packages), Requirement.parse('numpy<=1.19.3; sys_platform == "win32"'), {'azureml-dataset-runtime'}).


In [2]:
ws = Workspace.from_config()

In [3]:
myenv = Environment(name='env_azure_pipeline')
myenv_dep = CondaDependencies.create(conda_packages=['pandas', 'scikit-learn', 'pip'], 
                                    pip_packages=['azureml-sdk', 'azureml-dataset-runtime', 
                                                 'azureml-dataset-runtime[fuse,pandas]'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210513.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "env_azure_pipeline",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-fo

In [4]:
compute_name = 'rk-test-compute'
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', 
                                                       max_nodes=2)

rk_cluster = ComputeTarget.create(ws, compute_name, compute_config)
rk_cluster.wait_for_completion(show_output=True)

SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# How to make this work?

```python 
import joblib 
import pandas as pd
from azureml.core import Model

model_path = Model.get_model_path('classification_model', 
                                  _workspace=ws)
model = joblib.load(model_path)


batch_data_df = batch_data.to_pandas_dataframe()
# X = batch_data_df.drop('y', axis=1)
model.predict(batch_data_df)
```

# Run experiment 

In [9]:
parallel_run_config = ParallelRunConfig(source_directory='.',
                                        entry_script="batch_scoring_script_1.py",
                                        mini_batch_size="5",
                                        error_threshold=10,
                                        output_action="append_row",
                                        environment=myenv,
                                        compute_target=rk_cluster,
                                        node_count=2)

In [10]:
# Registered model name 
model_used = 'demo_iris_model'

# this is our batch data to make predictions 
batch_data = ws.datasets['Iris Data'] 

# Setting up output folder 
default_ds = ws.get_default_datastore()
output_dir = PipelineData(name='batch_inference', 
                          datastore=default_ds)

# output_dir = OutputFileDatasetConfig(name='batch_inferences')

In [12]:
parallel_run_step = ParallelRunStep(name='batch-score', 
                                   parallel_run_config=parallel_run_config,
                                   inputs=[batch_data.as_named_input('batch_data')],
                                   output=output_dir,
                                   arguments=[],
                                   allow_reuse=True)

pipeline = Pipeline(workspace=ws, 
                    steps=[parallel_run_step])

In [13]:
pipeline_run = Experiment(workspace=ws, 
                         name='batch_prediction_pipeline').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

Created step batch-score [c9080ca5][8a73242c-2fd3-4ead-81c5-51892f392dad], (This step will run and generate new outputs)
Submitted PipelineRun 7e37da03-9c99-4034-bdba-157c147598a5
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7e37da03-9c99-4034-bdba-157c147598a5?wsid=/subscriptions/54245888-2ffe-41fa-b080-67a29997b41c/resourcegroups/rg-dataservices-sandbox-01/workspaces/ds_dev_01&tid=4ef6e02a-f252-4618-a1dc-03bd2f93157d
PipelineRunId: 7e37da03-9c99-4034-bdba-157c147598a5
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7e37da03-9c99-4034-bdba-157c147598a5?wsid=/subscriptions/54245888-2ffe-41fa-b080-67a29997b41c/resourcegroups/rg-dataservices-sandbox-01/workspaces/ds_dev_01&tid=4ef6e02a-f252-4618-a1dc-03bd2f93157d
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: d1711bf3-a6dd-4393-8509-5829bd3404bd
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/d1711bf3-a6dd-4393-8509-5829bd3404bd?wsid=/subscriptions/54245888-2f


mkl_random-1.1.0     | 369 KB    |            |   0% 
mkl_random-1.1.0     | 369 KB    | ########## | 100% 

mkl-2019.4           | 204.1 MB  |            |   0% 
mkl-2019.4           | 204.1 MB  | 1          |   2% 
mkl-2019.4           | 204.1 MB  | 6          |   6% 
mkl-2019.4           | 204.1 MB  | 9          |   9% 
mkl-2019.4           | 204.1 MB  | #4         |  15% 
mkl-2019.4           | 204.1 MB  | #8         |  19% 
mkl-2019.4           | 204.1 MB  | ##2        |  22% 
mkl-2019.4           | 204.1 MB  | ##6        |  27% 
mkl-2019.4           | 204.1 MB  | ###        |  30% 
mkl-2019.4           | 204.1 MB  | ###3       |  34% 
mkl-2019.4           | 204.1 MB  | ###6       |  37% 
mkl-2019.4           | 204.1 MB  | ###9       |  40% 
mkl-2019.4           | 204.1 MB  | ####2      |  43% 
mkl-2019.4           | 204.1 MB  | ####5      |  46% 
mkl-2019.4           | 204.1 MB  | ####8      |  48% 
mkl-2019.4           | 204.1 MB  | #####1     |  51% 
mkl-2019.4           | 204

Removing intermediate container 4456db87140d
 ---> 1b07de39e06e
Step 9/19 : ENV PATH /azureml-envs/azureml_a3d9dd06675f6250dab3e82ffd329a2b/bin:$PATH
 ---> Running in fc64a06810a1
Removing intermediate container fc64a06810a1
 ---> 61d581826ca5
Step 10/19 : COPY azureml-environment-setup/send_conda_dependencies.py azureml-environment-setup/send_conda_dependencies.py
 ---> 579e03bf9846
Step 11/19 : RUN echo "Copying environment context"
 ---> Running in 93f1a5bccef1
Copying environment context
Removing intermediate container 93f1a5bccef1
 ---> aa845aaec585
Step 12/19 : COPY azureml-environment-setup/environment_context.json azureml-environment-setup/environment_context.json
 ---> 92afee4274df
Step 13/19 : RUN python /azureml-environment-setup/send_conda_dependencies.py -p /azureml-envs/azureml_a3d9dd06675f6250dab3e82ffd329a2b
 ---> Running in 19871d84b085
Report materialized dependencies for the environment
Reading environment context
Exporting conda environment
Sending request with mate

2021-10-12T18:24:22Z Executing 'Copy ACR Details file' on 10.0.0.6
2021-10-12T18:24:22Z Executing 'Copy ACR Details file' on 10.0.0.4
2021-10-12T18:24:22Z Copy ACR Details file succeeded on 10.0.0.4. Output: 
>>>   
>>>   
2021-10-12T18:24:23Z Copy ACR Details file succeeded on 10.0.0.6. Output: 
>>>   
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_fd4c267b43591d24118d62beb84a2960
01bf7da0a88c: Pulling fs layer
f3b4a5f15c7a: Pulling fs layer
57ffbe87baa1: Pulling fs layer
86120caa19f5: Pulling fs layer
c0f2d44469de: Pulling fs layer
638bc09d59ce: Pulling fs layer
cec7eddb8044: Pulling fs layer
31cda9815495: Pulling fs layer
a1b48f84f0d1: Pulling fs layer
484f821166bc: Pulling fs layer
6d70e314b196: Pulling fs layer
4c3b25cfbfa1: Pulling fs layer
632de08133ce: Pulling fs layer
64feec91fced: Pulling fs layer
b2bed34b31d7: Pulling fs layer
a756b52c6d13: Pulling fs layer
c77ef6c8a315: Pulling fs layer
6aaa56e805a7: Pulling fs layer
314a55c66e31: Pulling fs 



[2021-10-12T18:30:50.055153] The experiment failed. Finalizing run...
Cleaning up all outstanding Run operations, waiting 900.0 seconds
4 items cleaning up...
Cleanup took 0.19381427764892578 seconds
azureml_common.parallel_run.exception_info.Exception: Run failed. Below is the error detail:
EntryScriptException: Entry script error. No progress update in 270 seconds. No progress update in this check. Wait 276 seconds since last update. Remain 0 seconds to progress timeout. 152 among 543 mini batches processed. Please check logs/user/error/* and logs/sys/error/* to see if some errors have occurred.
Processed 152 of 543 mini batches.
The run() function in the entry script had raised exception for 212 times. Please check logs at logs/user/error/* for details.
  * Error 'PL not found.' occurred 192 times.
  * Error 'Failed to get input.' occurred 20 times.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "driver/amlbi_main.p

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "User program failed with Exception: Run failed, please check logs for details. You can check logs/readme.txt for the layout of logs.",
        "messageParameters": {},
        "detailsUri": "https://aka.ms/azureml-known-errors",
        "details": []
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"User program failed with Exception: Run failed, please check logs for details. You can check logs/readme.txt for the layout of logs.\",\n        \"messageParameters\": {},\n        \"detailsUri\": \"https://aka.ms/azureml-known-errors\",\n        \"details\": []\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}

```python 
prediction_run = next(pipeline_run.get_childern())
prediction_output = prediction_run.get_output_data('batch_inference')
prediction_output.download(local_path='results')
```

# Deploy 

```python 
published_pipeline = pipeline_run.publish_pipeline(name='batch_prediction_pipeline', 
                                                   description='Batch pipeline demo', 
                                                   version='1.0')
print('Published Pipeline', published_pipeline)

rest_endpoint = published_pipeline.endpoint
print('Rest Endpoint', rest_endpoint)

hourly = ScheduleRecurrence(frequency='Hour', interval=1)
pipeline_schedule = Schedule.create(workspace=ws, 
                                   name='trains model hourly', 
                                   pipeline_id=published_pipeline.id, 
                                   experiment_name='batch_prediction_pipeline', 
                                   recurrence=hourly)
print('Pipeline Schedule', pipeline_schedule)
```

# Deleting things 

In [10]:
pipeline_schedule.disable()

NameError: name 'pipeline_schedule' is not defined

In [9]:
rk_cluster.delete()