diff --git a/diabetes_regression/azureml_environment.json b/diabetes_regression/azureml_environment.json new file mode 100644 index 00000000..8a81614e --- /dev/null +++ b/diabetes_regression/azureml_environment.json @@ -0,0 +1,39 @@ +{ + "name": "diabetes_regression_sklearn", + "version": null, + "environmentVariables": { + "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE" + }, + "python": { + "userManagedDependencies": false, + "interpreterPath": "python", + "condaDependenciesFile": null, + "baseCondaEnvironment": null + }, + "docker": { + "enabled": true, + "baseImage": "mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04", + "baseDockerfile": null, + "sharedVolumes": true, + "shmSize": "2g", + "arguments": [], + "baseImageRegistry": { + "address": null, + "username": null, + "password": null + } + }, + "spark": { + "repositories": [], + "packages": [], + "precachePackages": true + }, + "databricks": { + "mavenLibraries": [], + "pypiLibraries": [], + "rcranLibraries": [], + "jarLibraries": [], + "eggLibraries": [] + }, + "inferencingStackVersion": null +} diff --git a/diabetes_regression/ci_dependencies.yml b/diabetes_regression/ci_dependencies.yml index a61731c4..a5ee8db7 100644 --- a/diabetes_regression/ci_dependencies.yml +++ b/diabetes_regression/ci_dependencies.yml @@ -5,16 +5,19 @@ dependencies: # The python interpreter version. - python=3.7.5 -- r=3.6.0 -- r-essentials=3.6.0 + # dependencies with versions aligned with conda_dependencies.yml. - numpy=1.18.1 - pandas=1.0.0 - scikit-learn=0.22.1 + # dependencies for MLOps with R. +- r=3.6.0 +- r-essentials=3.6.0 + - pip=20.0.2 - pip: - # dependencies shared with other environment .yml files. + # dependencies with versions aligned with conda_dependencies.yml. - azureml-sdk==1.0.79 # Additional pip dependencies for the CI environment. diff --git a/diabetes_regression/conda_dependencies.yml b/diabetes_regression/conda_dependencies.yml new file mode 100644 index 00000000..741f55c7 --- /dev/null +++ b/diabetes_regression/conda_dependencies.yml @@ -0,0 +1,33 @@ +# Details about the Conda environment file format: +# https://conda.io/docs/using/envs.html#create-environment-file-by-hand + +name: diabetes_regression_sklearn + +dependencies: + + # The python interpreter version. +- python=3.7.5 + + # Required by azureml-defaults, installed separately through Conda to + # get a prebuilt version and not require build tools for the install. +- psutil=5.6 #latest + +- numpy=1.18.1 +- pandas=1.0.0 +- scikit-learn=0.22.1 +#- r-essentials +#- tensorflow +#- keras + +- pip=20.0.2 +- pip: + + # Dependencies for training environment. + + - azureml-core==1.0.79 + + # Dependencies for scoring environment. + + # You must list azureml-defaults as a pip dependency + - azureml-defaults==1.0.79 + - inference-schema[numpy-support]==1.0.1 diff --git a/diabetes_regression/scoring/deployment_config_aci.yml b/diabetes_regression/scoring/deployment_config_aci.yml index 939483b5..d2e0ba12 100644 --- a/diabetes_regression/scoring/deployment_config_aci.yml +++ b/diabetes_regression/scoring/deployment_config_aci.yml @@ -1,5 +1,4 @@ ---- +computeType: ACI containerResourceRequirements: cpu: 1 memoryInGB: 4 -computeType: ACI \ No newline at end of file diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml index 42947da8..52017bae 100644 --- a/diabetes_regression/scoring/inference_config.yml +++ b/diabetes_regression/scoring/inference_config.yml @@ -1,6 +1,6 @@ entryScript: score.py runtime: python -condaFile: ../scoring_dependencies.yml +condaFile: ../conda_dependencies.yml extraDockerfileSteps: schemaFile: sourceDirectory: diff --git a/diabetes_regression/scoring_dependencies.yml b/diabetes_regression/scoring_dependencies.yml deleted file mode 100644 index 26ce3622..00000000 --- a/diabetes_regression/scoring_dependencies.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Conda environment specification. The dependencies defined in this file will -# be automatically provisioned for managed runs. These include runs against -# the localdocker, remotedocker, and cluster compute targets. - -# Note that this file is NOT used to automatically manage dependencies for the -# local compute target. To provision these dependencies locally, run: -# conda env update --file conda_dependencies.yml - -# Details about the Conda environment file format: -# https://conda.io/docs/using/envs.html#create-environment-file-by-hand - -# For managing Spark packages and configuration, see spark_dependencies.yml. -# Version of this configuration file's structure and semantics in AzureML. -# This directive is stored in a comment to preserve the Conda file structure. -# [AzureMlVersion] = 2 - -name: diabetes_scoring - -dependencies: - - # The python interpreter version. -- python=3.7.5 - - # Required by azureml-defaults, installed separately through Conda to - # get a prebuilt version and not require build tools for the install. -- psutil=5.6 #latest - -- numpy=1.18.1 -- pandas=1.0.0 -- scikit-learn=0.22.1 - -- pip=20.0.2 -- pip: - # You must list azureml-defaults as a pip dependency - - azureml-defaults==1.0.85 - - inference-schema[numpy-support]==1.0.1 diff --git a/diabetes_regression/training_dependencies.yml b/diabetes_regression/training_dependencies.yml deleted file mode 100644 index 4d7a42a7..00000000 --- a/diabetes_regression/training_dependencies.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: diabetes_training - -dependencies: - - # The python interpreter version. -- python=3.7.5 - -- numpy=1.18.1 -- pandas=1.0.0 -- scikit-learn=0.22.1 -#- r-essentials -#- tensorflow -#- keras - -- pip=20.0.2 -- pip: - - azureml-core==1.0.79 diff --git a/docs/code_description.md b/docs/code_description.md index d69a6f30..d60df616 100644 --- a/docs/code_description.md +++ b/docs/code_description.md @@ -27,8 +27,8 @@ ### Environment Definitions -- `diabetes_regression/training_dependencies.yml` : Conda environment definition for the training environment (Docker image in which train.py is run). -- `diabetes_regression/scoring_dependencies.yml` : Conda environment definition for the scoring environment (Docker image in which score.py is run). +- `diabetes_regression/azureml_environment.json` : Azure ML environment definition for the training environment, including base Docker image and a reference to `conda_dependencies.yml` Conda environment file. +- `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run). - `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment. ### Code diff --git a/docs/getting_started.md b/docs/getting_started.md index 1d75bc05..805db267 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -175,9 +175,9 @@ Great, you now have the build pipeline set up which automatically triggers every pipelines using R to train a model. This is enabled by changing the `build-train-script` pipeline variable to either of: * `diabetes_regression_build_train_pipeline_with_r.py` to train a model -with R on Azure ML Compute. You will also need to add the -`r-essentials` Conda packages into `diabetes_regression/scoring_dependencies.yml` -and `diabetes_regression/training_dependencies.yml`. +with R on Azure ML Compute. You will also need to uncomment (i.e. include) the +`r-essentials` Conda packages in the environment definition +`diabetes_regression/conda_dependencies.yml`. * `diabetes_regression_build_train_pipeline_with_r_on_dbricks.py` to train a model with R on Databricks. You will need to manually create a Databricks cluster and attach it to the ML Workspace as a diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py index c828c55c..b7d32f99 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py @@ -1,8 +1,8 @@ from azureml.pipeline.core.graph import PipelineParameter from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline -from azureml.core import Workspace -from azureml.core.runconfig import RunConfiguration, CondaDependencies +from azureml.core import Workspace, Environment +from azureml.core.runconfig import RunConfiguration from azureml.core import Dataset, Datastore from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env @@ -28,17 +28,17 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a run configuration environment - conda_deps_file = "diabetes_regression/training_dependencies.yml" - conda_deps = CondaDependencies(conda_deps_file) - run_config = RunConfiguration(conda_dependencies=conda_deps) - run_config.environment.docker.enabled = True - config_envvar = {} + # Create a reusable run configuration environment + # Read definition from diabetes_regression/azureml_environment.json + environment = Environment.load_from_directory(e.sources_directory_train) if (e.collection_uri is not None and e.teamproject_name is not None): builduri_base = e.collection_uri + e.teamproject_name builduri_base = builduri_base + "/_build/results?buildId=" - config_envvar["BUILDURI_BASE"] = builduri_base - run_config.environment.environment_variables = config_envvar + environment.environment_variables["BUILDURI_BASE"] = builduri_base + environment.register(aml_workspace) + + run_config = RunConfiguration() + run_config.environment = environment model_name_param = PipelineParameter( name="model_name", default_value=e.model_name) diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py index 1c383d50..96ddf2cf 100644 --- a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py +++ b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py @@ -1,7 +1,7 @@ from azureml.pipeline.steps import PythonScriptStep from azureml.pipeline.core import Pipeline -from azureml.core import Workspace -from azureml.core.runconfig import RunConfiguration, CondaDependencies +from azureml.core import Workspace, Environment +from azureml.core.runconfig import RunConfiguration from ml_service.util.attach_compute import get_compute from ml_service.util.env_variables import Env @@ -26,11 +26,19 @@ def main(): print("aml_compute:") print(aml_compute) - # Create a run configuration environment - conda_deps_file = "diabetes_regression/training_dependencies.yml" - conda_deps = CondaDependencies(conda_deps_file) - run_config = RunConfiguration(conda_dependencies=conda_deps) - run_config.environment.docker.enabled = True + # Create a reusable run configuration environment + # Read definition from diabetes_regression/azureml_environment.json + # Make sure to include `r-essentials' + # in diabetes_regression/conda_dependencies.yml + environment = Environment.load_from_directory(e.sources_directory_train) + if (e.collection_uri is not None and e.teamproject_name is not None): + builduri_base = e.collection_uri + e.teamproject_name + builduri_base = builduri_base + "/_build/results?buildId=" + environment.environment_variables["BUILDURI_BASE"] = builduri_base + environment.register(aml_workspace) + + run_config = RunConfiguration() + run_config.environment = environment train_step = PythonScriptStep( name="Train Model",