# FWI in Azure project

## Set-up AzureML resources

This project ports devito (https://github.com/opesci/devito) into Azure and runs tutorial notebooks at:
https://nbviewer.jupyter.org/github/opesci/devito/blob/master/examples/seismic/tutorials/



In this notebook we setup AzureML resources. This notebook should be run once and will enable all subsequent notebooks.

<a id='user_input_requiring_steps'></a>
User input requiring steps:
 - [Fill in and save sensitive information](#dot_env_description)
 - [Azure login](#Azure_login) (may be required first time the notebook is run) 
 - [Set __create_ACR_FLAG__ to true to trigger ACR creation and to save of ACR login info](#set_create_ACR_flag)
 - [Azure CLI login ](#Azure_cli_login) (may be required once to create an [ACR](https://azure.microsoft.com/en-us/services/container-registry/)) 



In [1]:
# Allow multiple displays per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all" 

## Azure Machine Learning and Pipeline SDK-specific imports

In [2]:
import sys, os
import shutil
import urllib
import azureml.core
from azureml.core import Workspace, Experiment
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
import platform,  dotenv

In [3]:
print("Azure ML SDK Version: ", azureml.core.VERSION)
platform.platform()
os.getcwd()

Azure ML SDK Version:  1.0.69


'Linux-4.15.0-1061-azure-x86_64-with-debian-10.0'

'/workspace/examples/imaging/azureml_devito/notebooks'

#### 1. Create utilities file

##### 1.1 Define utilities file (project_utils.py) path
Utilities file created here has code for Azure resources access authorization, project configuration settings like directories and file names in __project_consts__ class.

In [4]:
utils_file_name = 'project_utils'
auxiliary_files_dir = os.path.join(*(['.', 'src']))


utils_path_name = os.path.join(os.getcwd(), auxiliary_files_dir)
utils_full_name = os.path.join(utils_path_name, os.path.join(*([utils_file_name+'.py'])))
os.makedirs(utils_path_name, exist_ok=True)
    
def ls_l(a_dir):
    return ([f for f in os.listdir(a_dir) if os.path.isfile(os.path.join(a_dir, f))]) 

##### 1.2. Edit/create project_utils.py file

In [5]:
%%writefile $utils_full_name

from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.authentication import AuthenticationException
import dotenv, logging, pathlib, os


#  credit Mathew Salvaris
def get_auth(env_path):
    """Tries to get authorization info by first trying to get Service Principal info, then CLI, then interactive. 
    """
    logger = logging.getLogger(__name__)
    crt_sp_pwd = os.environ.get("SP_PASSWORD", None)
    if crt_sp_pwd:
        logger.debug("Trying to create Workspace with Service Principal")
        aml_sp_password = crt_sp_pwd
        aml_sp_tennant_id = dotenv.get_key(env_path, 'SP_TENANT_ID')
        aml_sp_username = dotenv.get_key(env_path, 'SP_APPLICATION_ID')
        auth = ServicePrincipalAuthentication(
            tenant_id=aml_sp_tennant_id,
            username=aml_sp_username,
            password=aml_sp_password,
        )
    else:
        logger.debug("Trying to create Workspace with CLI Authentication")
        try:
            auth = AzureCliAuthentication()
            auth.get_authentication_header()
        except AuthenticationException:
            logger.debug("Trying to create Workspace with Interactive login")
            auth = InteractiveLoginAuthentication()

    return auth  


def set_dotenv_info(dotenv_file_path, env_dict):
    """Use dict loop to set multiple keys in dotenv file.
    Minimal file error management.
    """
    logger = logging.getLogger(__name__)
    if bool(env_dict):
        dotenv_file = pathlib.Path(dotenv_file_path)
        if not dotenv_file.is_file():
            logger.debug('dotenv file not found, will create "{}" using the sensitive info you provided.'.format(dotenv_file_path))
            dotenv_file.touch()
        else:
            logger.debug('dotenv file "{}" found, will (over)write it with current sensitive info you provided.'.format(dotenv_file_path))
            
        for crt_key, crt_val in env_dict.items():
            dotenv.set_key(dotenv_file_path, crt_key, crt_val)

    else:
       logger.debug(\
                    'Trying to save empty env_dict variable into {}, please set your sensitive info in a dictionary.'\
                    .format(dotenv_file_path)) 
        

class project_consts(object):
    """Keep project's file names and directory structure in one place.
    Minimal setattr error management.
    """
    
    AML_WORKSPACE_CONFIG_DIR = ['.', '..',  'not_shared']
    AML_EXPERIMENT_DIR = ['.', '..',  'temp']
    AML_WORKSPACE_CONFIG_FILE_NAME = 'aml_ws_config.json'
    DOTENV_FILE_PATH = AML_WORKSPACE_CONFIG_DIR + ['general.env'] 
    DOCKER_DOTENV_FILE_PATH = AML_WORKSPACE_CONFIG_DIR + ['dockerhub.env'] 

    def __setattr__(self, *_):
        raise TypeError

        
if __name__=="__main__":
    """Basic function/class tests.
    """
    import sys, os
    prj_consts = project_consts()
    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.DEBUG) # Logging Levels: DEBUG	10, NOTSET	0
    logger.debug('AML ws file = {}'.format(os.path.join(*([os.path.join(*(prj_consts.AML_WORKSPACE_CONFIG_DIR)),
                                            prj_consts.AML_WORKSPACE_CONFIG_FILE_NAME]))))

    crt_dotenv_file_path = os.path.join(*(prj_consts.DOTENV_FILE_PATH))
    set_dotenv_info(crt_dotenv_file_path, {})
      

Overwriting /workspace/examples/imaging/azureml_devito/notebooks/./src/project_utils.py


##### 1.3. Import utilities functions defined above

In [6]:
def add_path_to_sys_path(path_to_append):
    if not (any(path_to_append in paths for paths in sys.path)):
        sys.path.append(path_to_append)
        
paths_to_append = [os.path.join(os.getcwd(), auxiliary_files_dir)]
[add_path_to_sys_path(crt_path) for crt_path in paths_to_append]


import project_utils
prj_consts = project_utils.project_consts()


[None]

#### 2. Set-up the AML SDK infrastructure

* Create Azure resource group (rsg),  workspaces, 
* save sensitive info using [python-dotenv](https://github.com/theskumar/python-dotenv)  
  
Notebook repeateability notes:
* The notebook tries to find and use an existing Azure resource group (rsg) defined by __crt_resource_group__. It creates a new one if needed. 

<a id='set_create_ACR_flag'></a>

##### Create [ACR]() first time this notebook is run. 
Either docker hub or ACR can be used to store the experimentation image. To create the ACR, set:  
```
create_ACR_FLAG=True  
```
It will create an ACR by running severral steps described below in section 2.7.  __Create an [ACR]__  
  
  
[Back](#user_input_requiring_steps) to summary of user input requiring steps.

In [7]:
create_ACR_FLAG = False #True False

In [8]:
sensitive_info = {}

<a id='dot_env_description'></a>
##### 2.1. Input here sensitive and configuration information
[dotenv](https://github.com/theskumar/python-dotenv) is used to hide sensitive info, like Azure subscription name/ID. The serialized info needs to be manually input once.  
  
* REQUIRED ACTION for the 2 cells below: uncomment them, add the required info in first cell below, run both cells one. 
  The sensitive information will be packed in __sensitive_info__ dictionary variable, which that will then be saved in a following cell in an .env file (__dotenv_file_path__) that should likely be git ignored.   

*  OPTIONAL STEP: After running once the two cells below to save __sensitive_info__ dictionary variable with your custom info, you can comment them and leave the __sensitive_info__ variable defined above as an empty python dictionary.  
   
   
__Notes__:
* An empty __sensitive_info__ dictionary is ignored by the __set_dotenv_info__ function defined above in project_utils.py . 
* The saved .env file will be used thereafter in each cell that starts with %dotenv. 
* The saved .env file contains user specific information and it shoulld __not__ be version-controlled in git.
* If you would like to [use service principal authentication](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azure-ml.ipynb) make sure you provide the optional values as well (see get_auth function definition in project_utils.py file created above for details).

[Back](#user_input_requiring_steps) to summary of user input requiring steps.

In [9]:
# subscription_id = ""
# resource_group = "ghiordanfwirsg01"
# workspace_name = "ghiordanfwiws"
# workspace_region = "eastus2"
# gpu_cluster_name = "gpuclstfwi02"
# gpucluster_admin_user_name = ""
# gpucluster_admin_user_password = ""

# experimentation_docker_image_name = "fwi01_azureml"
# experimentation_docker_image_tag = "sdk.v1.0.60"
# docker_container_mount_point = '/datadrive01/prj/DeepSeismic/fwi' # use project directory or a subdirectory

# docker_login = "georgedockeraccount"
# docker_pwd = ""

# acr_name="fwi01acr"

In [10]:
# sensitive_info = {
# 'SUBSCRIPTION_ID':subscription_id,
# 'RESOURCE_GROUP':resource_group, 
# 'WORKSPACE_NAME':workspace_name, 
# 'WORKSPACE_REGION':workspace_region,
# 'GPU_CLUSTER_NAME':gpu_cluster_name,
# 'GPU_CLUSTER_ADMIN_USER_NAME':gpucluster_admin_user_name,
# 'GPU_CLUSTER_ADMIN_USER_PASSWORD':gpucluster_admin_user_password,
# 'EXPERIMENTATION_DOCKER_IMAGE_NAME':experimentation_docker_image_name,
# 'EXPERIMENTATION_DOCKER_IMAGE_TAG':experimentation_docker_image_tag,
# 'DOCKER_CONTAINER_MOUNT_POINT':docker_container_mount_point,
# 'DOCKER_LOGIN':docker_login,
# 'DOCKER_PWD':docker_pwd,
# 'ACR_NAME':acr_name
# }

##### 2.2. Save sensitive info
An empty __sensitive_info__ variable will be ingored.  
A non-empty __sensitive_info__ variable will overwrite info in an existing .env file.

In [11]:
%load_ext dotenv
dotenv_file_path = os.path.join(*(prj_consts.DOTENV_FILE_PATH)) 
os.makedirs(os.path.join(*(prj_consts.DOTENV_FILE_PATH[:-1])), exist_ok=True)

# # show .env file path
# !pwd
dotenv_file_path

#save your sensitive info
project_utils.set_dotenv_info(dotenv_file_path, sensitive_info)

'./../not_shared/general.env'

##### 2.3. Use (load) saved sensitive info
THis is how sensitive info will be retrieved in other notebooks

In [12]:
%dotenv $dotenv_file_path

subscription_id = os.getenv('SUBSCRIPTION_ID')
# # print a bit of subscription ID, to show dotenv file was found and loaded 
# subscription_id[:2]

crt_resource_group  = os.getenv('RESOURCE_GROUP')
crt_workspace_name = os.getenv('WORKSPACE_NAME')
crt_workspace_region = os.getenv('WORKSPACE_REGION') 

##### 2.4.  Access your workspace

* In AML SDK we can get a ws in two ways:  
    - via Workspace(subscription_id = ...)   
    - via Workspace.from_config(path=some_file_path).   
    
For demo purposes, both ways are shown in this notebook.

*  At first notebook run:
    - the AML workspace ws is typically not found, so a new ws object is created and persisted on disk.
    - If the ws has been created other ways (e.g. via Azure portal), it may be persisted on disk by calling ws1.write_config(...).

In [13]:
workspace_config_dir = os.path.join(*(prj_consts.AML_WORKSPACE_CONFIG_DIR))
workspace_config_file = prj_consts.AML_WORKSPACE_CONFIG_FILE_NAME

# # print debug info if needed     
# workspace_config_dir    
# ls_l(os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))))

<a id='Azure_login'></a>
###### Login into Azure may be required here
[Back](#user_input_requiring_steps) to summary of user input requiring steps.

In [14]:
try:
    ws1 = Workspace(
        subscription_id = subscription_id, 
        resource_group = crt_resource_group, 
        workspace_name = crt_workspace_name,
        auth=project_utils.get_auth(dotenv_file_path))
    print("Workspace configuration loading succeeded. ")
#     ws1.write_config(path=os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))),
#             file_name=workspace_config_file)
    del ws1 # ws will be (re)created later using from_config() function
except Exception as e :
    print('Exception msg: {}'.format(str(e )))
    print("Workspace not accessible. Will create a new workspace below")
    
    workspace_region = crt_workspace_region

    # Create the workspace using the specified parameters
    ws2 = Workspace.create(name = crt_workspace_name,
                          subscription_id = subscription_id,
                          resource_group = crt_resource_group, 
                          location = workspace_region,
                          create_resource_group = True,
                          exist_ok = False)
    ws2.get_details()

    # persist the subscription id, resource group name, and workspace name in aml_config/config.json.
    ws2.write_config(path=os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))),
            file_name=workspace_config_file)
    
    #Delete ws2 and use ws = Workspace.from_config() as shwon below to recover the ws, rather than rely on what we get from one time creation
    del ws2

Workspace configuration loading succeeded. 


##### 2.5.  Demo access to created workspace

From now on, even in other notebooks, the provisioned AML workspace will be accesible using Workspace.from_config() as shown below:

In [15]:
# path arg is:
#   - a file path which explictly lists aml_config subdir for function from_config() 
#   - a dir path with a silently added <<aml_config>> subdir for function write_config(). 
ws = Workspace.from_config(path=os.path.join(os.getcwd(), 
                                             os.path.join(*([workspace_config_dir, '.azureml', workspace_config_file]))))
# # print debug info if needed
# print(ws.name, ws.resource_group, ws.location, ws.subscription_id[0], sep = '\n')

##### 2.6.  Create compute cluster used in following notebooks

In [16]:
gpu_cluster_name = os.getenv('GPU_CLUSTER_NAME')
gpu_cluster_name

'gpuclstfwi02'

In [17]:
max_nodes_value = 3

try:
    gpu_cluster = ComputeTarget(workspace=ws, name=gpu_cluster_name)
    print("Found existing gpu cluster")
except ComputeTargetException:
    print("Could not find gpu cluster, please create one")
    
#     # Specify the configuration for the new cluster, add admin_user_ssh_key='ssh-rsa ... ghiordan@microsoft.com' if needed
#     compute_config = AmlCompute.provisioning_configuration(vm_size="Standard_NC12",
#                                                            min_nodes=0,
#                                                            max_nodes=max_nodes_value,
#                                                            admin_username=os.getenv('GPU_CLUSTER_ADMIN_USER_NAME'), 
#                                                            admin_user_password=os.getenv('GPU_CLUSTER_ADMIN_USER_NAME'))
#     # Create the cluster with the specified name and configuration
#     gpu_cluster = ComputeTarget.create(ws, gpu_cluster_name, compute_config)

#     # Wait for the cluster to complete, show the output log
#     gpu_cluster.wait_for_completion(show_output=True)

Found existing gpu cluster


##### 2.7.  Create an [ACR](https://docs.microsoft.com/en-us/azure/container-registry/) if you have not done so using the [portal](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-get-started-portal)   
 - Follow the 4 ACR steps described below.  
 - Uncomment cells' lines as needed to login and see commands responses while you set the right subscription and then create the ACR. 
 - You need [Azure CLI](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli) to run the commands below. 

<a id='Azure_cli_login'></a>
##### ACR Step 1.  Select ACR subscription (az cli login into Azure may be required here)
[Back](#user_input_requiring_steps) to summary of user input requiring steps.

In [18]:
!az --version
if create_ACR_FLAG:
    !az login
    response01 = ! az account list --all --refresh -o table
    response02 = ! az account set --subscription $subscription_id
    response03 = ! az account list -o table
    response04 = ! $cli_command

    response01
    response02
    response03
    response04

azure-cli                         2.0.75

command-modules-nspkg              2.0.3
core                              2.0.75
nspkg                              3.0.4
telemetry                          1.0.4

Python location '/opt/az/bin/python3'
Extensions directory '/root/.azure/cliextensions'

Python (Linux) 3.6.5 (default, Oct 11 2019, 09:04:03) 
[GCC 6.3.0 20170516]

Legal docs and information: aka.ms/AzureCliLegal


Your CLI is up-to-date.


##### ACR Step 2.  Create the ACR

In [19]:
%dotenv $dotenv_file_path
acr_name = os.getenv('ACR_NAME')

cli_command='az acr create --resource-group '+ crt_resource_group +' --name ' + acr_name + ' --sku Basic'
cli_command

if create_ACR_FLAG:
    !$cli_command

'az acr create --resource-group ghiordanfwirsg01 --name fwi01acr --sku Basic'

##### ACR Step 3. Also enable password and login via __ [--admin-enabled true](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-authentication) __ and then use the az cli or portal to set up the credentials

In [20]:
# per https://docs.microsoft.com/en-us/azure/container-registry/container-registry-authentication
cli_command='az acr update -n '+acr_name+' --admin-enabled true'
cli_command

if create_ACR_FLAG:
    response = !$cli_command
    response

'az acr update -n fwi01acr --admin-enabled true'

##### ACR Step 4. Save the ACR password and login

In [21]:
if create_ACR_FLAG:
    import subprocess
    cli_command = 'az acr credential show -n '+acr_name

    acr_username = subprocess.Popen(cli_command+' --query username',shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE).\
    communicate()[0].decode("utf-8").split()[0].strip('\"')

    acr_password = subprocess.Popen(cli_command+' --query passwords[0].value',shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE).\
    communicate()[0].decode("utf-8").split()[0].strip('\"')

    dotenv.set_key(dotenv_file_path, 'ACR_PASSWORD', acr_password)
    dotenv.set_key(dotenv_file_path, 'ACR_USERNAME', acr_username)

In [22]:
%reload_ext dotenv
%dotenv -o $dotenv_file_path

# print acr password and login info saved in dotenv file
if create_ACR_FLAG:
    os.getenv('ACR_PASSWORD')
    os.getenv('ACR_USERNAME')

In [23]:
print('Finished running 000_Setup_GeophysicsTutorial_FWI_Azure_devito!')

Finished running 000_Setup_GeophysicsTutorial_FWI_Azure_devito!
