### Copyright (C) Microsoft Corporation.  
  
# Create utility scripts and set-up AML SDK account (create AML workspace)
  
Notebook is used as an IDE to edit and save auxiliary .py scripts. AML SDK infra-structure is set up here once before being re-used in the other project notebooks.

## Required steps:
* See instructions in [section 2.1](#2.1-Input-here-sensitive-and-configuration-information) and edit the following cell to fill in custom and sensitive information in the __sensitive_info__ dictionary variable. 
* Review cells in [section 1](#1.-Edit-auxiliary-.py-files) below if needed. Edit the constants defined in __o16n_regular_ML_R_models_consts__ class if you wish to have a different directory structure.
* Login into Azure may be required in Section [2.3](#2.3-Login-into-Azure-may-be-required-here)

In [None]:
# Allow multiple displays per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import azureml.core
from azureml.core import Workspace
import sys, os


In [None]:
# Check core SDK version number, os info and current wd
print("SDK version:", azureml.core.VERSION)

import platform
platform.platform()

# os.getcwd()

#### Define utilities file path

In [None]:
utils_file_name = 'o16n_regular_ML_R_models_utils'
auxiliary_files_dir = os.path.join(*(['.', 'src']))

In [None]:
utils_path_name = os.path.join(os.getcwd(), auxiliary_files_dir)
utils_full_name = os.path.join(utils_path_name, os.path.join(*([utils_file_name+'.py'])))
os.makedirs(utils_path_name, exist_ok=True)
    
def ls_l(a_dir):
    return ([f for f in os.listdir(a_dir) if os.path.isfile(os.path.join(a_dir, f))])     

### 1. Edit auxiliary .py files

In [None]:
%%writefile $utils_full_name

from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.authentication import AuthenticationException
import dotenv, logging, pathlib


#  credit Mathew Salvaris
def get_auth(env_path):
    """Tries to get authorization info by first trying to get Service Principal info, then CLI, then interactive. 
    """
    logger = logging.getLogger(__name__)
    crt_sp_pwd = dotenv.get_key(env_path, 'SP_PASSWORD')
    if  crt_sp_pwd != "YOUR_SERVICE_PRINCIPAL_PASSWORD":
        logger.debug("Trying to create Workspace with Service Principal")
        aml_sp_password = crt_sp_pwd
        aml_sp_tennant_id = dotenv.get_key(env_path, 'SP_TENANT_ID')
        aml_sp_username = dotenv.get_key(env_path, 'SP_APPLICATION_ID')
        auth = ServicePrincipalAuthentication(
            tenant_id=aml_sp_tennant_id,
            username=aml_sp_username,
            password=aml_sp_password,
        )
    else:
        logger.debug("Trying to create Workspace with CLI Authentication")
        try:
            auth = AzureCliAuthentication()
            auth.get_authentication_header()
        except AuthenticationException:
            logger.debug("Trying to create Workspace with Interactive login")
            auth = InteractiveLoginAuthentication()

    return auth  


def set_dotenv_info(dotenv_file_path, env_dict):
    """Use dict loop to set multiple keys in dotenv file.
    Minimal file error management.
    """
    logger = logging.getLogger(__name__)
    if bool(env_dict):
        dotenv_file = pathlib.Path(dotenv_file_path)
        if not dotenv_file.is_file():
            logger.debug('dotenv file not found, will create "{}" using the sensitive info you provided.'.format(dotenv_file_path))
            dotenv_file.touch()
        else:
            logger.debug('dotenv file "{}" found, will overwrite it with current sensitive info you provided.'.format(dotenv_file_path))
            
        for crt_key, crt_val in env_dict.items():
            dotenv.set_key(dotenv_file_path, crt_key, crt_val)

    else:
       logger.debug(\
                    'Trying to save empty env_dict variable into {} , please set u your sensitive info in a dictionary.'\
                    .format(dotenv_file_path)) 
        

class R_models_operationalization_consts(object):
    """Keep project's file names and directory structure in one place.
    Minimal setattr error management.
    """
    
    AML_WORKSPACE_CONFIG_DIR = ['..', '..',  'not_shared']
    AML_WORKSPACE_CONFIG_FILE_NAME = 'aml_ws_config.json'
    DOTENV_FILE_PATH = ['..',  '..',  'not_shared', 'general.env'] 
    
    AML_EXPERIMENT_DIR = ['..', '..',  'temp']
    SCORE_SCRIPT_FILE_NAME = 'score_script.py'
    
    R_MODEL_DIR = [ '..',  'R_experimentation'] 
    R_MODEL_AML_NAME = 'trained_r_model'
    R_MODEL_FILE_NAME = 'ksvm_model01.rds'
    R_MODEL_CONDA_DEPENDENCIES_FILE_NAME = 'conda_dependencies01.yml'
    o16n_DOCKER_IMAGE_NAME = "regml-r-realtime-image001"

    def __setattr__(self, *_):
        raise TypeError

        
if __name__=="__main__":
    """Basic function/class tests.
    """
    import sys, os
    prj_consts = o16n_regular_ML_R_models_consts()
    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.DEBUG) # Logging Levels: DEBUG	10, NOTSET	0
    logger.debug('AML ws file = {}'.format(os.path.join(*([os.path.join(*(prj_consts.AML_WORKSPACE_CONFIG_DIR)),
                                            prj_consts.AML_WORKSPACE_CONFIG_FILE_NAME]))))
    logger.debug('full score script file name = {}'.format(os.path.join(*([os.path.join(*(prj_consts.AML_EXPERIMENT_DIR)),
                                            prj_consts.SCORE_SCRIPT_FILE_NAME]))))
    
    
    crt_dotenv_file_path = os.path.join(*(prj_consts.DOTENV_FILE_PATH))
    set_dotenv_info(crt_dotenv_file_path, {})
          

#### Import utilities functions defined above

In [None]:
def add_path_to_sys_path(path_to_append):
    if not (any(path_to_append in paths for paths in sys.path)):
        sys.path.append(path_to_append)
        
paths_to_append = [os.path.join(os.getcwd(), auxiliary_files_dir)]
[add_path_to_sys_path(crt_path) for crt_path in paths_to_append]

In [None]:
import o16n_regular_ML_R_models_utils
prj_consts = o16n_regular_ML_R_models_utils.R_models_operationalization_consts()

### 2. Set-up the AML SDK infrastructure

* Create Azure resource group (rsg),  workspaces, 
* save sensitive info using [python-dotenv](https://github.com/theskumar/python-dotenv)  
  
Notebook repeateability notes:
* The notebook tries to find and use an existing Azure resource group (rsg) defined by __crt_resource_group__. It creates a new one if needed. 

In [None]:
sensitive_info = {}

### 2.1 Input here sensitive and configuration information
[dotenv](https://github.com/theskumar/python-dotenv) is used to hide sensitive info, like Azure subscription name/ID. The serialized info needs to be manually input once.  
  
* add the required info in cell below.  
  The info will be packed in __sensitive_info__ dictionary variable, which that will then be saved in a following cell in an .env file (__dotenv_file_path__) that should likely be git ignored.  
* For the COMPUTE_CONTEXT_VM_... information, if you used the __"VM provisioning and configuration via Azure CLI"__  [guide](https://github.com/Azure/AMLSDKOperationalizationRModels/blob/master/README.md) section, you should use the information you saved when running the CLI script to deploy the AML SDK Compute Target machine for this project. 
*  After running once this cell and the one that saves __sensitive_info__ dictionary variable with your custom info, you can comment/remove content in the sensitive info input cells and leave the __sensitive_info__ variable defined above as an empty python dictionary. 
* An empty __sensitive_info__ dictionary is ignored by the __set_dotenv_info__ function defined above in o16n_regular_ML_R_models_utils.py . 
* The saved .env file will be used thereafter in each cell that starts with %dotenv. 
* If you would like to [use service principal authentication](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/manage-azureml-service/authentication-in-azureml/authentication-in-azure-ml.ipynb) make sure you provide the optional values as well.

In [None]:
subscription_id=''
resource_group=''
workspace_name=''
compute_context_vm_user_name=''
compute_context_vm_fqdn='somevm.eastus2.cloudapp.azure.com'
compute_context_vm_ssh_port='22'
compute_context_vm_pwd='somepwd'

# Optional, for service principal authentication. Leave untouched if SP is not used.
sp_tenant_id="YOUR_TENANT_ID" 
sp_application_id="YOUR_SERVICE_PRINCIPAL_APPLICATION_ID"
sp_password="YOUR_SERVICE_PRINCIPAL_PASSWORD" 

In [None]:
sensitive_info = {
'SUBSCRIPTION_ID':subscription_id,
'RESOURCE_GROUP':resource_group, 
'WORKSPACE_NAME':workspace_name, 
'COMPUTE_CONTEXT_VM_USER_NAME':compute_context_vm_user_name,
'COMPUTE_CONTEXT_VM_FQDN':compute_context_vm_fqdn,
'COMPUTE_CONTEXT_VM_SSH_PORT':str(compute_context_vm_ssh_port),
'COMPUTE_CONTEXT_VM_PWD':compute_context_vm_pwd,
'SP_TENANT_ID':sp_tenant_id, # service principal authentication is an anlternative to interactive login 
'SP_APPLICATION_ID':sp_application_id, 
'SP_PASSWORD':sp_password 
}

##### Save sensitive info
An empty __sensitive_info__ variable will be ingored.  
A non-empty __sensitive_info__ variable will overwrite info in an existing .env file.  

In [None]:
%load_ext dotenv
dotenv_file_path = os.path.join(*(prj_consts.DOTENV_FILE_PATH))
os.makedirs(os.path.join(*(prj_consts.DOTENV_FILE_PATH[:-1])), exist_ok=True)

# # show .env file path
# dotenv_file_path

#save your sensitive info
o16n_regular_ML_R_models_utils.set_dotenv_info(dotenv_file_path, sensitive_info)

##### Use (load) saved sensitive info

In [None]:
%dotenv $dotenv_file_path

subscription_id = os.getenv('SUBSCRIPTION_ID')
# # print a bit of subscription ID, to show dotenv file was found and loaded 
# subscription_id[:2]

crt_resource_group  = os.getenv('RESOURCE_GROUP')
crt_workspace_name = os.getenv('WORKSPACE_NAME')
crt_workspace_region = "eastus2" # or eastus2euap

### 2.2 Access your workspace

* In AML SDK we can get a ws in two ways:  
    - via Workspace(subscription_id = ...)   
    - via Workspace.from_config(path=some_file_path).   
    
For demo purposes, both ways are shown in this notebook.

*  At first notebook run:
    - the AML workspace ws is typically not found, so a new ws object is created and persisted on disk.
    - If the ws has been created other ways (e.g. via Azure portal), it may be persisted on disk by calling ws1.write_config(...).

In [None]:
workspace_config_dir = os.path.join(*(prj_consts.AML_WORKSPACE_CONFIG_DIR))
workspace_config_file = prj_consts.AML_WORKSPACE_CONFIG_FILE_NAME

# # print debug info if needed     
# workspace_config_dir    
# ls_l(os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))))

### 2.3 Login into Azure may be required here

In [None]:
try:
    ws1 = Workspace(
        subscription_id = subscription_id, 
        resource_group = crt_resource_group, 
        workspace_name = crt_workspace_name,
        auth=o16n_regular_ML_R_models_utils.get_auth(dotenv_file_path))
    print("Workspace configuration loading succeeded. ")
    del ws1 # ws will be (re)created later using from_config() function
except Exception as e :
    print('Exception msg: {}'.format(str(e )))
    print("Workspace not accessible. Will create a new workspace below")
    
    workspace_region = crt_workspace_region

    # Create the workspace using the specified parameters
    ws2 = Workspace.create(name = crt_workspace_name,
                          subscription_id = subscription_id,
                          resource_group = crt_resource_group, 
                          location = workspace_region,
                          create_resource_group = True,
                          exist_ok = False)
    ws2.get_details()

    # persist the subscription id, resource group name, and workspace name in aml_config/config.json.
    ws2.write_config(path=os.path.join(os.getcwd(), os.path.join(*([workspace_config_dir]))),
            file_name=workspace_config_file)
    
    #use ws = Workspace.from_config() as shwon below to recover the ws, rather than rely on what we get from one time creation
    del ws2

#### From now on, even in other notebooks, the provisioned AML workspace will be accesible using Workspace.from_config() as shown below:

In [None]:
# path arg is:
#   - a file path which explictly lists aml_config subdir for function from_config() 
#   - a dir path with a silently added <<aml_config>> subdir for function write_config(). 
ws = Workspace.from_config(path=os.path.join(os.getcwd(), 
                                             os.path.join(*([workspace_config_dir, 'aml_config', workspace_config_file]))))
# # print debug info if needed
# print(ws.name, ws.resource_group, ws.location, ws.subscription_id[0], sep = '\n')
del ws

In [None]:
!jupyter nbconvert --to html 000_RegularR_RealTime_Scripts_and_SDK_setup.ipynb

In [None]:
print('Finished running 000_RegularR_RealTime_Scripts_and_SDK_setup.ipynb!')