microsoft · algattik · Nov 28, 2019 · Jan 19, 2020 · Jan 19, 2020
diff --git a/.gitignore b/.gitignore
@@ -47,6 +47,8 @@ coverage.xml
 *.cover
 .hypothesis/
 .pytest_cache/
+*-testresults.xml
+test-output.xml
 
 # Translations
 *.mo

diff --git a/.pipelines/azdo-base-pipeline.yml b/.pipelines/azdo-base-pipeline.yml
@@ -1,26 +1,65 @@
-# this pipeline should be ignored for now
-parameters:
-  pipelineType: 'training'
-
-steps:
-- script: |
-   flake8 --output-file=$(Build.BinariesDirectory)/lint-testresults.xml --format junit-xml  
-  workingDirectory: '$(Build.SourcesDirectory)'
-  displayName: 'Run code quality tests'
-  enabled: 'true'
-
-- script: |
-   pytest --junitxml=$(Build.BinariesDirectory)/unit-testresults.xml $(Build.SourcesDirectory)/tests/unit
-  displayName: 'Run unit tests'
-  enabled: 'true'
-  env:
-    SP_APP_SECRET: '$(SP_APP_SECRET)'
-
-- task: PublishTestResults@2
-  condition: succeededOrFailed()
-  inputs:
-    testResultsFiles: '$(Build.BinariesDirectory)/*-testresults.xml'
-    testRunTitle: 'Linting & Unit tests'
-    failTaskOnFailedTests: true
-  displayName: 'Publish linting and unit test results'
-  enabled: 'true'
+jobs:
+- job: BuildEnvironment
+  displayName: "Set up CI environment"
+  steps:
+  - task: AzureCLI@1
+    displayName: Generate build container
+    name: BuildContainer
+    inputs:
+      azureSubscription: AzureResourceConnection
+      scriptLocation: inlineScript
+      inlineScript: |
+        set -euxo pipefail  # fail on error
+
+        # Get name and server of the container registry linked to the Azure ML workspace.
+        acrId=$(az resource show -g "$RESOURCE_GROUP" -n "$WORKSPACE_NAME" --resource-type Microsoft.MachineLearningServices/workspaces --query properties.containerRegistry -o tsv)
+        read -r acrName acrServer <<< $(az resource show --id "$acrId" --query '[[name,properties.loginServer]]' -o tsv)
+
+        dockerDir=environment_setup/build-image
+        # Generate an tag with a reproducible checksum of all files in $dockerDir by doing a checksum of all files
+        # in alphabetical order, then another checksum of their names and checksums.
+        imageTag=$(find "$dockerDir" -type f -exec md5sum {} \; | sort -k 2 | md5sum | cut -f1 -d ' ')
+
+        # If the image with the generated tag doesn't already exist, build it.
+        repo="modelbuild/$(BUILDCONTAINER_NAME)"
+        if ! az acr repository show -n $acrName --image "$repo:$imageTag" -o table; then
+          az acr build \
+            -r "$acrName" \
+            -t "$repo:$imageTag" \
+            -t "$repo:latest" \
+            "$dockerDir"
+        fi
+
+        # Make image name available to subsequent jobs.
+        echo "##vso[task.setvariable variable=BUILD_IMAGE;isOutput=true]$acrServer/$repo:$imageTag"
+
+- job: "Model_CI"
+  displayName: "Model CI"
+  dependsOn: BuildEnvironment
+  variables:
+    BUILD_IMAGE: $[ dependencies.BuildEnvironment.outputs['BuildContainer.BUILD_IMAGE'] ]
+  container:
+    image: $(BUILD_IMAGE)
+    endpoint: ContainerRegistry
+  timeoutInMinutes: 0
+  steps:
+
+  - script: |
+     tox
+    displayName: 'Linting & unit tests'
+
+  - task: PublishTestResults@2
+    condition: succeededOrFailed()
+    inputs:
+      testResultsFiles: '*-testresults.xml'
+      testRunTitle: 'Linting & Unit tests'
+      failTaskOnFailedTests: true
+    displayName: 'Publish test results'
+
+  - task: PublishCodeCoverageResults@1
+    displayName: 'Publish coverage report'
+    condition: succeededOrFailed()
+    inputs:
+      codeCoverageTool: Cobertura
+      summaryFileLocation: 'coverage.xml'
+      failIfCoverageEmpty: true
diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml
@@ -7,26 +7,26 @@ trigger:
     exclude:
     - docs/
     - environment_setup/
-    - ml_service/util/create_scoring_image.*
+    - ml_service/util/create_scoring_image.py
     - ml_service/util/smoke_test_scoring_service.py
 
 variables:
 - template: azdo-variables.yml
 - group: devopsforai-aml-vg
 
+pool:
+  vmImage: ubuntu-latest
 
 stages:
 - stage: 'Model_CI'
   displayName: 'Model CI'
   jobs:
-  - job: "Model_CI_Pipeline"
-    displayName: "Model CI Pipeline"
-    pool:
-      vmImage: 'ubuntu-latest'
+  - template: azdo-base-pipeline.yml
+  - job: "Publish_Pipeline"
+    displayName: "Publish AML Pipeline"
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
-    - template: azdo-base-pipeline.yml
     - task: AzureCLI@1
       inputs:
         azureSubscription: '$(WORKSPACE_SVC_CONNECTION)'
@@ -35,7 +35,7 @@ stages:
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
           # Invoke the Python building and publishing a training pipeline
-          python $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
+          python ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
       displayName: 'Publish Azure Machine Learning Pipeline'
 
 - stage: 'Trigger_AML_Pipeline'
@@ -44,8 +44,6 @@ stages:
   - job: "Get_Pipeline_ID"
     condition: and(succeeded(), eq(coalesce(variables['auto-trigger-training'], 'true'), 'true'))
     displayName: "Get Pipeline ID for execution"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -56,7 +54,7 @@ stages:
         inlineScript: |
           set -e # fail on error
           export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
-          python $(Build.SourcesDirectory)/ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
+          python ml_service/pipelines/run_train_pipeline.py --output_pipeline_id_file "pipeline_id.txt" --skip_train_execution
           # Set AMLPIPELINEID variable for next AML Pipeline task in next job
           AMLPIPELINEID="$(cat pipeline_id.txt)"
           echo "##vso[task.setvariable variable=AMLPIPELINEID;isOutput=true]$AMLPIPELINEID"
@@ -89,8 +87,6 @@ stages:
   - job: "Training_Run_Report"
     dependsOn: "Run_ML_Pipeline"
     displayName: "Determine if evaluation succeeded and new model is registered"
-    pool:
-        vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -102,8 +98,6 @@ stages:
   jobs:
   - job: "Deploy_ACI"
     displayName: "Deploy to ACI"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -137,8 +131,6 @@ stages:
   jobs:
   - job: "Deploy_AKS"
     displayName: "Deploy to AKS"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:
@@ -173,8 +165,6 @@ stages:
   jobs:
   - job: "Deploy_Webapp"
     displayName: "Deploy to Webapp"
-    pool:
-      vmImage: 'ubuntu-latest'
     container: mcr.microsoft.com/mlops/python:latest
     timeoutInMinutes: 0
     steps:

diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml
@@ -4,16 +4,12 @@ pr:
     include:
     - master
 
-pool: 
+pool:
   vmImage: 'ubuntu-latest'
 
-container: mcr.microsoft.com/mlops/python:latest  
-
-
 variables:
 - template: azdo-variables.yml
 - group: devopsforai-aml-vg
 
-
-steps:
-- template: azdo-base-pipeline.yml
+jobs:
+- template: azdo-base-pipeline.yml
diff --git a/.pipelines/azdo-template-get-model-version.yml b/.pipelines/azdo-template-get-model-version.yml
@@ -6,7 +6,7 @@ steps:
     inlineScript: |
       set -e # fail on error
       export SUBSCRIPTION_ID=$(az account show --query id -o tsv)
-      python $(Build.SourcesDirectory)/ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --output_model_version_file "model_version.txt"
+      python ml_service/pipelines/verify_train_pipeline.py --build_id $(Build.BuildId) --output_model_version_file "model_version.txt"
       # Output model version to Azure DevOps job
       MODEL_VERSION="$(cat model_version.txt)"
       echo "##vso[task.setvariable variable=MODEL_VERSION]$MODEL_VERSION"

diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml
@@ -2,6 +2,9 @@ variables:
   # Azure ML Workspace Variables
 - name: EXPERIMENT_NAME
   value: mlopspython
+  # Azure DevOps build container
+- name: BUILDCONTAINER_NAME
+  value: diabetes
   # AML Compute Cluster Config
 - name: AML_COMPUTE_CLUSTER_CPU_SKU
   value: STANDARD_DS2_V2

diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ This reference architecture shows how to implement continuous integration (CI),
 
 Once you have registered your ML model, you can use Azure ML + Azure DevOps to deploy it.
 
-[Azure DevOps release pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/release/?view=azure-devops) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes).
+The [Azure DevOps multi-stage pipeline](https://docs.microsoft.com/en-us/azure/devops/pipelines/process/stages?view=azure-devops&tabs=yaml) packages the new model along with the scoring file and its python dependencies into a [docker image](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#image) and pushes it to [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-intro). This image is used to deploy the model as [web service](https://docs.microsoft.com/en-us/azure/machine-learning/service/concept-azure-machine-learning-architecture#web-service) across QA and Prod environments. The QA environment is running on top of [Azure Container Instances (ACI)](https://azure.microsoft.com/en-us/services/container-instances/) and the Prod environment is built with [Azure Kubernetes Service (AKS)](https://docs.microsoft.com/en-us/azure/aks/intro-kubernetes).
 
 
 ### Repo Details

diff --git a/code/scoring/score.py b/code/scoring/score.py
@@ -57,7 +57,7 @@ def run(raw_data, request_headers):
                request_headers.get("X-Ms-Request-Id", ""),
                request_headers.get("Traceparent", ""),
                len(result)
-           ))
+    ))
 
     return {"result": result.tolist()}
 

diff --git a/code/training/R/train_with_r_on_databricks.py b/code/training/R/train_with_r_on_databricks.py
@@ -11,5 +11,5 @@
 args, unknown = parser.parse_known_args()
 folder = args.AZUREML_SCRIPT_DIRECTORY_NAME
 
-os.system("cd " + "/dbfs/" + folder +
-          " && Rscript r_train.r && ls -ltr model.rds")
+os.system("cd " + "/dbfs/" + folder
+          + " && Rscript r_train.r && ls -ltr model.rds")
diff --git a/docs/code_description.md b/docs/code_description.md
@@ -2,15 +2,15 @@
 
 ### Environment Setup
 
-- `environment_setup/requirements.txt` : It consists of a list of python packages which are needed by the train.py to run successfully on host agent (locally).
+- `environment_setup/build-image/Dockerfile` : Dockerfile of a build agent containing Python 3.6 and all required packages.
+
+- `environment_setup/build-image/requirements.txt` : List of python packages which are needed by the train.py and associated unit tests to run successfully on host agent (locally).
 
 - `environment_setup/install_requirements.sh` : This script prepares the python environment i.e. install the Azure ML SDK and the packages specified in requirements.txt
 
 - `environment_setup/iac-*.yml, arm-templates` : Infrastructure as Code piplines to create and delete required resources along with corresponding arm-templates.
 
-- `environment_setup/Dockerfile` : Dockerfile of a build agent containing Python 3.6 and all required packages.
-
-- `environment_setup/docker-image-pipeline.yml` : An AzDo pipeline for building and pushing [microsoft/mlopspython](https://hub.docker.com/_/microsoft-mlops-python) image. 
+- `environment_setup/mlops-image/docker-image-pipeline.yml` : An AzDo pipeline for building and pushing [microsoft/mlopspython](https://hub.docker.com/_/microsoft-mlops-python) image. 
 
 ### Pipelines
 

diff --git a/docs/development_setup.md b/docs/development_setup.md
@@ -0,0 +1,44 @@
+## Development environment setup
+
+### Setup
+
+Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
+
+In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 
+
+### Installation
+
+[Install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively.
+
+Create a virtual environment using [venv](https://docs.python.org/3/library/venv.html), [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) or [pyenv-virtualenv](https://github.com/pyenv/pyenv-virtualenv). 
+
+Here is an example for setting up and activating a `venv` environment with Python 3:
+
+```
+python3 -mvenv .venv
+source .venv/bin/activate
+```
+
+Install the required Python modules in your virtual environment.
+
+```
+pip install -r environment_setup/build-image/requirements.txt 
+```
+
+### Running local code
+
+To run your local ML pipeline code on Azure ML, run a command such as the following (in bash, all on one line):
+
+```
+export BUILD_BUILDID=$(uuidgen); python ml_service/pipelines/build_train_pipeline.py && python ml_service/pipelines/run_train_pipeline.py
+```
+
+BUILD_BUILDID is a variable used to uniquely identify the ML pipeline between the
+`build_train_pipeline.py` and `run_train_pipeline.py` scripts. In Azure DevOps it is
+set to the current build number. In a local environment, we can use a command such as
+`uuidgen` so set a different random identifier on each run, ensuring there are 
+no collisions.
+
+### Local testing
+
+Before committing, run `tox` to execute linter and unit test checks.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -80,10 +80,7 @@ There are more variables used in the project. They're defined in two places, one
 
 ### Local configuration
 
-In order to configure the project locally, create a copy of `.env.example` in the root directory and name it `.env`. Fill out all missing values and adjust the existing ones to suit your requirements. 
-
-For local development, you will also need to [install the Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli). The Azure CLI will be used to log you in interactively.
-Please be aware that the local environment also needs access to the Azure subscription so you have to have Contributor access on the Azure ML Workspace.
+For instructions on how to set up a local development environment, refer to the [Development environment setup instructions](development_setup.md).
 
 ### Azure DevOps configuration
 
@@ -139,6 +136,18 @@ your Azure AD tenant, or receive the ID and secret of a service principal
 from your Azure AD Administrator. That principal must have Contributor
 permissions on the Azure ML Workspace.
 
+## Create a Docker Service Connection to manage your build container
+
+The pipeline requires a **Docker Registry**
+[service connection](https://docs.microsoft.com/en-us/azure/devops/pipelines/library/service-endpoints?view=azure-devops&tabs=yaml#sep-docreg).
+As **Registry type**, choose **Azure Container Registry**.
+Point to the Azure Container Registry deployed by your IaC pipeline.
+
+Use **``ContainerRegistry``** as the connection name, since it is used
+in the Azure ML pipeline definition.
+
+![configure container registry service connection](images/create-acr-service-connection.png)
+
 ## Set up Build, Release Trigger, and Release Multi-Stage Pipeline
 
 Now that you have all the required resources created from the IaC pipeline,
@@ -172,9 +181,24 @@ Great, you now have the build pipeline set up which automatically triggers every
 
 * The first stage of the pipeline, **Model CI**, performs linting, unit testing, build and publishes an **ML Training Pipeline** in an **ML Workspace**.
 
+  * The **Generate build container** job creates a Docker container with
+    your ML dependencies, such as sklearn, or reuses the container
+    from a previous run if your container definition has not changed.
+  * The **Model CI** job runs on the Docker container generated in the
+    previous job and runs linting and unit tests. Test results and
+    code coverage reports can be found on your job output page.
+  * The **Publish AML Pipeline** job publishes the Azure ML pipeline
+    in your workspace. To save time, this job runs in parallel with
+    the **Model CI** job. If CI fails (for instance with unit test failures),
+    the AML Pipeline might have been published but will not be used
+    for training. The next run will publish a new AML Pipeline with
+    the same name and updated tags.
+
   **Note:** The build pipeline also supports building and publishing ML
 pipelines using R to train a model. This is enabled
-by changing the `build-train-script` pipeline variable to either `build_train_pipeline_with_r.py`, or `build_train_pipeline_with_r_on_dbricks.py`. For pipeline training a model with R on Databricks you'll need
+by changing the `build-train-script` pipeline variable to either `build_train_pipeline_with_r.py`, or `build_train_pipeline_with_r_on_dbricks.py`
+and uncommenting the R installation step in the `Dockerfile`.
+For a pipeline training a model with R on Databricks you'll need
 to manually create a Databricks cluster and attach it to the ML Workspace as a
 compute (Values DB_CLUSTER_ID and DATABRICKS_COMPUTE_NAME variables should be
 specified).

diff --git a/docs/images/create-acr-service-connection.png b/docs/images/create-acr-service-connection.png