### Copyright (C) Microsoft Corporation.  
  
# Deploy regular ML R model in Azure AKS/ACI using AML SDK
Purpose: 
* use notebook as an IDE to edit and save auxiliary scripts on disk  
* set-up AML SDK infra-structure
  
#### Authors

* **George Iordanescu** - *Initial work* - [Microsoft AI CAT](https://github.com/Azure/o16nRegularMLRmodelsUsingAzurek8s)

See also the list of [contributors](https://github.com/Azure/o16nRegularMLRmodelsUsingAzurek8s/contributors) who participated in this project.  

In [1]:
# Allow multiple displays per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Check core SDK version number, os info and current wd
import azureml.core

from azureml.core import Workspace

print("SDK version:", azureml.core.VERSION)

import platform
platform.platform()

import os
os.getcwd()

SDK version: 1.0.17


'Linux-4.9.125-linuxkit-x86_64-with-debian-9.5'

'/workspace/code/amlsdk_operationalization'

### 1. Edit auxiliary .py files

In [3]:
utils_file_name = 'o16n_regular_ML_R_models_utils'
auxiliary_files_dir = os.path.join(*(['.', 'src']))

In [4]:
utils_path_name = os.path.join(os.getcwd(), auxiliary_files_dir)
utils_full_name = os.path.join(utils_path_name, os.path.join(*([utils_file_name+'.py'])))
utils_full_name
if not (os.path.isdir(utils_path_name)): os.mkdir(utils_path_name) 

'/workspace/code/amlsdk_operationalization/./src/o16n_regular_ML_R_models_utils.py'

In [5]:
%%writefile $utils_full_name

from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.authentication import AuthenticationException
import dotenv, logging, pathlib


#  credit Mathew Salvaris
def get_auth(env_path):
    """Tries to get authorization info by first trying to get Service Principal info, then CLI, then interactive. 
    """
    logger = logging.getLogger(__name__)
    crt_sp_pwd = dotenv.get_key(env_path, 'SP_PASSWORD')
    if  crt_sp_pwd != "YOUR_SERVICE_PRINCIPAL_PASSWORD":
        logger.debug("Trying to create Workspace with Service Principal")
        aml_sp_password = crt_sp_pwd
        aml_sp_tennant_id = dotenv.get_key(env_path, 'SP_TENANT_ID')
        aml_sp_username = dotenv.get_key(env_path, 'SP_APPLICATION_ID')
        auth = ServicePrincipalAuthentication(
            tenant_id=aml_sp_tennant_id,
            username=aml_sp_username,
            password=aml_sp_password,
        )
    else:
        logger.debug("Trying to create Workspace with CLI Authentication")
        try:
            auth = AzureCliAuthentication()
            auth.get_authentication_header()
        except AuthenticationException:
            logger.debug("Trying to create Workspace with Interactive login")
            auth = InteractiveLoginAuthentication()

    return auth  


def set_dotenv_info(dotenv_file_path, env_dict):
    """Use dict loop to set multiple keys in dotenv file.
    Minimal file error management.
    """
    logger = logging.getLogger(__name__)
    if bool(env_dict):
        dotenv_file = pathlib.Path(dotenv_file_path)
        if not dotenv_file.is_file():
            logger.debug('dotenv file not found, will create "{}" using the sensitive info you provided.'.format(dotenv_file_path))
            dotenv_file.touch()
            for crt_key, crt_val in env_dict.items():
                dotenv.set_key(dotenv_file_path, crt_key, crt_val)
        else:
            logger.debug('dotenv file "{}" found, will ignore current the sensitive info dictionary.'.format(dotenv_file_path))
    else:
       logger.debug(\
                    'Trying to save empty env_dict variable into {} , please set u your sensitive info in a dictionary.'\
                    .format(dotenv_file_path)) 
        

class o16n_regular_ML_R_models_consts(object):
    """Keep project's file names and directory structure in one place.
    Minimal setattr error management.
    """
    
    AML_WORKSPACE_CONFIG_DIR = ['..', '..',  'not_shared']
    AML_WORKSPACE_CONFIG_FILE_NAME = 'aml_ws_config.json'
    DOTENV_FILE_PATH = ['..',  '..',  'not_shared', 'general.env'] 
    
    AML_EXPERIMENT_DIR = ['..', '..',  'temp']
    SCORE_SCRIPT_FILE_NAME = 'score_script.py'
    
    R_MODEL_DIR = [ '..',  'R_experimentation'] 
    R_MODEL_AML_NAME = 'trained_r_model'
    R_MODEL_FILE_NAME = 'ksvm_model01.rds'
    R_MODEL_CONDA_DEPENDENCIES_FILE_NAME = 'conda_dependencies01.yml'
    o16n_DOCKER_IMAGE_NAME = "regml-r-realtime-image001"

    def __setattr__(self, *_):
        raise TypeError

        
if __name__=="__main__":
    """Basic function/class tests.
    """
    import sys, os
    prj_consts = o16n_regular_ML_R_models_consts()
    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.DEBUG) # Logging Levels: DEBUG	10, NOTSET	0
    logger.debug('AML ws file = {}'.format(os.path.join(*([os.path.join(*(prj_consts.AML_WORKSPACE_CONFIG_DIR)),
                                            prj_consts.AML_WORKSPACE_CONFIG_FILE_NAME]))))
    logger.debug('full score script file name = {}'.format(os.path.join(*([os.path.join(*(prj_consts.AML_EXPERIMENT_DIR)),
                                            prj_consts.SCORE_SCRIPT_FILE_NAME]))))
    
    
    crt_dotenv_file_path = os.path.join(*(prj_consts.DOTENV_FILE_PATH))
    set_dotenv_info(crt_dotenv_file_path, {})
          

Overwriting /workspace/code/amlsdk_operationalization/./src/o16n_regular_ML_R_models_utils.py


In [6]:
import sys, os

def add_path_to_sys_path(path_to_append):
    if not (any(path_to_append in paths for paths in sys.path)):
        sys.path.append(path_to_append)
        
paths_to_append = [os.path.join(os.getcwd(), auxiliary_files_dir)]
[add_path_to_sys_path(crt_path) for crt_path in paths_to_append]

[None]

#### Run minimal function tests

In [7]:
!pwd
!python {os.path.join(*([auxiliary_files_dir,  utils_file_name+'.py']))}

# import subprocess
# subprocess.call(['python '+os.path.join(*([auxiliary_files_dir,  utils_file_name+'.py']))])

/workspace/code/amlsdk_operationalization
DEBUG:__main__:AML ws file = ../../not_shared/aml_ws_config.json
DEBUG:__main__:full score script file name = ../../temp/score_script.py
DEBUG:__main__:Trying to save empty env_dict variable into ../../not_shared/general.env , please set u your sensitive info in a dictionary.


### 2. Set-up the AML SDK infrastructure

* Create Azure resource group (rsg),  workspaces, 
* save sensitive info using [python-dotenv](https://github.com/theskumar/python-dotenv)  
  
Notebook repeateability notes:
* The notebook tries to find the Azure resource group (rsg) defined by __crt_resource_group__. It creates a new one if needed.  Existing rsg-s will have to be manually deleted via SDK or portal.

#### 2.1 To recreate the whole process, choosing a new rsg is enough. Changing variables crt_workspace_name and crt_workspace_region below is optional

In [8]:
import o16n_regular_ML_R_models_utils
prj_consts = o16n_regular_ML_R_models_utils.o16n_regular_ML_R_models_consts()

crt_resource_group  = 'ghiordanRo16n1rsg02'

In [9]:
crt_workspace_name = 'ghiordanregularrrealtimews'
crt_workspace_region = "eastus2" # or eastus2euap

crt_resource_group
crt_workspace_name

'ghiordanRo16n1rsg02'

'ghiordanregularrrealtimews'

#### 2.2 dotenv is used to hide sensitive info, like Azure subscription name/ID. The serialized info needs to be manually input once, in the following cells

#### Define project params in dotenv file
Uncomment all lines below, add the required info, and then run the cell once. This will create .env file. You can then leave as is (i.e. uncommented) the whole cell. The cell content will be ignored and the saved .env file will be used instead in each cell that starts with %dotenv. Using an empty sensitive_info dict is also ignored. 

In [10]:
sensitive_info = {}
# # Your sensitive info here 
# sensitive_info = {
# 'SUBSCRIPTION_ID':'xxx',
# 'COMPUTE_CONTEXT_VM_USER_NAME':'xxx',
# 'COMPUTE_CONTEXT_VM_FQDN':'somevm.eastus2.cloudapp.azure.com',
# 'COMPUTE_CONTEXT_VM_SSH_PORT':str(22),
# 'COMPUTE_CONTEXT_VM_PWD':'somepwd',
# 'SP_TENANT_ID':"YOUR_TENANT_ID", # Optional for service principal authentication
# 'SP_APPLICATION_ID':"YOUR_SERVICE_PRINCIPAL_APPLICATION_ID", # Optional for service principal authentication
# 'SP_PASSWORD':"YOUR_SERVICE_PRINCIPAL_PASSWORD" # Optional for service principal authentication
# }

##### Save sensitive info once

In [11]:
%load_ext dotenv
dotenv_file_path = os.path.join(*(prj_consts.DOTENV_FILE_PATH))

#show .env file path
dotenv_file_path

#save your sensitive info
o16n_regular_ML_R_models_utils.set_dotenv_info(dotenv_file_path, sensitive_info)

'../../not_shared/general.env'

##### Use (load) saved sensitive info

In [12]:
%dotenv $dotenv_file_path

import os
#print a bit of subscription ID, to show dotenv file was found adn loaded 
subscription_id = os.getenv('SUBSCRIPTION_ID')
subscription_id[:2]

'ed'

### Access your workspace
The following cell uses the Azure ML SDK to attempt to load the workspace specified by your parameters. If this cell succeeds, your notebook library will be configured to access the workspace from all notebooks using the Workspace.from_config() method. The cell can fail if the specified workspace doesn't exist or you don't have permissions to access it.

In [13]:
# azureml/core/workspace.py  write_config() The path defaults to the current working directory and file_name defaults to 'config.json'

workspace_config_dir = os.path.join(*(prj_consts.AML_WORKSPACE_CONFIG_DIR))
workspace_config_dir
workspace_config_file = prj_consts.AML_WORKSPACE_CONFIG_FILE_NAME

ws_config_file_full_name = os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir,  workspace_config_file])))
crt_dir = os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir])))
if not (os.path.isdir(crt_dir)): os.mkdir(crt_dir)

!chmod ugo=rwx {os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir])))}
!echo {os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir])))}
!ls -l {os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir])))}
ws_config_file_full_name

'../../not_shared'

/workspace/code/amlsdk_operationalization/../../not_shared
total 9
drwxrwxrwx 2 root root   0 Feb 27 16:23 aml_config
-rwxr-xr-x 1 root root 999 Mar  2 02:56 general - Copy.env
-rwxr-xr-x 1 root root 904 Mar  6 02:31 general.env
-rwxr-xr-x 1 root root 388 Mar  7 22:00 o16ninfo.env


'/workspace/code/amlsdk_operationalization/../../not_shared/aml_ws_config.json'

#### In AML sdk we can get a ws in two ways:

* Either via Workspace(subscription_id = ...) or via Workspace.from_config(path=some_file_path). Either way of recovering an existing ws is fine, but for demo purposes, both ways are shown in this notebook.

*  We first try how to use Workspace(subscription_id = ...) way. If ws is not found, a new ws object is created and persisted on disk.

* If this is the first time you are running this notebook, and the ws has already been created before, you may save its info on disk by uncommenting ws1.write_config(...) below


In [14]:
from azureml.core import Workspace

try:
    ws1 = Workspace(
        subscription_id = subscription_id, 
        resource_group = crt_resource_group, 
        workspace_name = crt_workspace_name,
        auth=o16n_regular_ML_R_models_utils.get_auth(dotenv_file_path))
    print("Workspace configuration succeeded. ")
    
#     ws1.write_config(path=os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))),
#             file_name=workspace_config_file)
    del ws1 # ws will be (re)created later using from_config() function
except Exception as e :
    print('Exception msg: {}'.format(str(e )))
    print("Workspace not accessible. Will create a new workspace below")
    
    workspace_region = crt_workspace_region

    # Create the workspace using the specified parameters
    ws2 = Workspace.create(name = crt_workspace_name,
                          subscription_id = subscription_id,
                          resource_group = crt_resource_group, 
                          location = workspace_region,
                          create_resource_group = True,
                          exist_ok = False)
    ws2.get_details()

    # persist the subscription id, resource group name, and workspace name in aml_config/config.json.
    ws2.write_config(path=os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))),
            file_name=workspace_config_file)
    
    #use ws = Workspace.from_config() lower to recover the ws, rather than rely on what we get from one time creation
    del ws2

Workspace configuration succeeded. 


#### From now on, even in other notebooks, the provisioned AML workspace will be accesible using:
```
ws = Workspace.from_config(some_AML_SDK_workspace_config_file)
```

In [15]:
# path arg is:
#   - a file path which explictly lists aml_config subdir for function from_config() 
#   - a dir path with a silently added <<aml_config>> subdir for function write_config(). 
ws = Workspace.from_config(path=os.path.join(os.getcwd(), 
                                             os.path.join(*([workspace_config_dir, 'aml_config', workspace_config_file]))))
print(ws.name, ws.resource_group, ws.location, ws.subscription_id[0], sep = '\n')
del ws

Found the config file in: /workspace/not_shared/aml_config/aml_ws_config.json
ghiordanregularrrealtimews
ghiordanRo16n1rsg02
eastus2
e


In [16]:
!jupyter nbconvert --to html 000_RegularR_RealTime_Scripts_and_SDK_setup.ipynb

[NbConvertApp] Converting notebook 000_RegularR_RealTime_Scripts_and_SDK_setup.ipynb to html
[NbConvertApp] Writing 302176 bytes to 000_RegularR_RealTime_Scripts_and_SDK_setup.html
