## Set Up Azure environment to train CV models

In [1]:
import azureml.core
from azureml.core import Experiment, Environment, Workspace, Dataset, Datastore, ScriptRunConfig
import os
import shutil

# check core SDK version number

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.34.0


In [2]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')
ws.datastores

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code ERYEZSB6H to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
wetlands	eastus	cic_ai


{'workspaceartifactstore': {
   "name": "workspaceartifactstore",
   "container_name": "azureml",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'wetlanddatastore': {
   "name": "wetlanddatastore",
   "container_name": "data",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'workspacefilestore': {
   "name": "workspacefilestore",
   "container_name": "azureml-filestore-2640f222-8801-40ec-b413-83a7ec003a55",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'workspaceblobstore': {
   "name": "workspaceblobstore",
   "container_name": "azureml-blobstore-2640f222-8801-40ec-b413-83a7ec003a55",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 }}

In [3]:
datastore = Datastore.get(workspace = ws, datastore_name = 'wetlanddatastore')
datastore

{
  "name": "wetlanddatastore",
  "container_name": "data",
  "account_name": "wetlands2489098639",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

## Register Blob/Fileshare as a workspace Datastore

In [4]:
# register our data share containing wetland image data in this workspace
Datastore.register_azure_blob_container(
    datastore_name = 'wetlanddatastore',# you make this name up here, it is how the datastore will be referenced in MLStudio from now on
    workspace = ws,
    container_name = 'data',
    account_name = 'wetlands2489098639',
    account_key = '***********'
)
# Datastore.register_azure_file_share(
#     account_key = 'sPNjo0eyBMjErCv+NfwUgZ/4xWb7U5QbmLkIII4N/RiFE3n4QS0SDXIz4zuJQKPiCOesvEgllvBlBxyReqA22w==',
#     workspace = ws,
#     datastore_name = 'wetlandDatastore',
#     file_share_name = '',
#     account_name = 'wetlands2489098639')

{
  "name": "wetlanddatastore",
  "container_name": "data",
  "account_name": "wetlands2489098639",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [5]:
# create a file dataset that can be used in training
# datastore = Datastore.get(workspace = ws, datastore_name = 'wetlanddatastore')
# datastore_paths = [(datastore, 'GEE')]
# gee_dataset = Dataset.File.from_files(path = datastore_paths)
# gee_dataset = gee_dataset.register(
#     workspace=ws,
#     name='gee-acd-data',
#     description='training and test TFRecords exported from GEE',
#     create_new_version=True)

### Create Virtual Environment

In [6]:
# create an environment for the first time

envs = Environment.list(workspace = ws)
# well start with a pre-built tensorflow environment
env = envs.get('AzureML-TensorFlow-2.3-GPU')
env

# define packages to be installed using CondaDependencies
# get the packages that are already part of the pre-built environment
conda_dep = env.python.conda_dependencies
# list packages to install
pip_packages = ['matplotlib', 'rasterio', 'tensorboard']

# add each package to the existing conda dependencies
for package in pip_packages:
    conda_dep.add_pip_package(package)

# double check all the packages are there
conda_dep.serialize_to_string()
# conda_dep = CondaDependencies.create(
#     pip_packages=pip_packages)

# Now update the conda dependencies of the python environment
env.python.conda_dependencies=conda_dep

# # Register environment to re-use later
env.name = 'wetland-training'
env.register(workspace = ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04:20210513.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "wetland-training",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "conda-forge"
         