# 1) Init ESML 

In [None]:
sys.path.append(os.path.abspath("../azure-enterprise-scale-ml/esml/common/"))  # NOQA: E402
from esml import ESMLDataset, ESMLProject
from azureml.core import Workspace

p = ESMLProject() # Makes it "environment aware (dev,test,prod)", and "configuration aware"
ws = p.get_workspace_from_config()
p.describe()

In [None]:
unregister_all_datasets=False

In [None]:
if(unregister_all_datasets):
    p.unregister_all_datasets(ws) # For DEMO purpose

p.init(ws) 
p.dev_test_prod = "dev"
#Note: This happens in the INIT() method of DriverAPI/CallingAppliation...not loading for every Scoring

# 2) Test an `existing` Model webservice (AKS)?

### 2a) Lets simulate INFERENCE: data arives from CLIENT to a `DriverAPI` that itself will call `this ModelAPI` (AKS)
Note: `Step 1 and 2 are out of scope` of from the MACHINE LEARNING solution (and ESML), other than...
 - `ESML` being the creator of AML pipeline - `Bronze_2_Gold` that you can reuse
 - `ESML lakedesign`, that should be used, if INFERENCE are to be stored/cached in the lake

NOW - the scenario: 

- DriverAPI will:

    - 1)Fetch missing datasources from different systems, to get all data sources ds01, ds02,ds03 & SAVE to INFERENCE/IN folder in the datalake
        - Use the `ESML` lakedesign:  IF `inference_model_version=1` in `settings/project_specific/lake_settings.json` then...
        - `ESML` will switch to write at `INFERENCE/v1/ds01/IN/2020/01/01` folder 
    - 2)Call `Bronze_2_Gold` batch pipeline (AML) - same pipeline that was used for training: `IN->Bronze->Silver->Gold`, except...
        - `Bronze_2_Gold` pipeline will read from IN folder, but configured for "INFERENCE", read/write to `"mirror"` place in the datalake, that will `CACHE` online predicted results
        - (LABEL values are missing of course)
    - 3) `DriverAPI calls ModelAPI` (This AKS webservuce) with data to score -> `X_text`

- `ESML` ModelAPI will:
    - 4) `Score` the data, and return results, also `save the results to the datalake`
        - if `p.rnd=True` nothing is stored.
    - 5) `Connect the scored data` to a caller, and the individual scores to a identity (user/machine)
        - GLOBALLY & ESML Managed: The scoring file gets a caller_id. 
            - You: Need to pass the called-guid as a parameter. 
                - Alternatively, you can have a column in the dataframe called `esml_caller_id_string`, and ESML will use that.
        - LOCALLY: The rows in the dataframe has an identity. 
            - You: Here you need to have a column in the dataframe, such as `user_id`, to be able to connect each row to a scoring.


Step 1 and 2 are out of scope of from the MACHINE LEARNING solution  
We START this notebooke at step 3 - we have `X_test` in `GOLD_Validate` status 

In [None]:
import datetime as dt
now = dt.datetime.now()
folder = now.strftime('%Y_%m_%d') 
print(folder)

In [None]:
if(unregister_all_datasets): # Create a GOLD dataset, and SPLIT it
    df_01 = p.DatasetByName("ds01_diabetes").Silver.to_pandas_dataframe()
    ds_gold_v1 = p.save_gold(df_01)

    label = "Y"
    train_6, validate_set_2, test_set_2 = p.split_gold_3(0.6, label)

In [None]:
# 2) Bronze_2_Gold done, we can fetch X_test
X_test, y_test, tags = p.get_gold_validate_Xy() # Version is default latest
print(tags)

In [None]:
X_test.head()

### 2b) Call onlin ModelAPI - score 1 row (alt 1)

import pandas as pd
#3) `DriverAPI calls ModelAPI`

keyvault = p.ws.get_default_keyvault()
api_url = keyvault.get_secret(name="esml-dev-p02-m03-api")
api_key = keyvault.get_secret(name="esml-dev-p02-m03-apisecret")  # esml-dev-p02-m03-apisecret

df = ESMLProject.call_webservice_own_url(X_test, api_url,api_key) # rest call. STATIC method - simulate "we dont need ESML for this" - just an URL and KEY from DriverAPI
df.head() # Print scoring

### 2b) Call online ModelAPI - score all rows (alt 2) - `via ESML wrapper`
- ESML benefits: Has built in `logic` to save scored_results, to unique folders during the day.
- Note: We can also save a GUID/ID, `to see which CALLER/User-Guid the scoring is about` 
    - We can have a User_id_GUID as a "feature/column" in X_test, or as a parameter in the call

In [None]:
p.inferenceModelVersion=1
print(p.ScoredPath)

In [None]:
caller_user_id = '81965d9c-40ca-4e47-9723-5a608a32a0e4' # Connect the scoring to a caller/user, globally for all rows

df = p.call_webservice(p.ws, X_test, caller_user_id, False) # Auto-fetch key from keyvault, 1stRowOnlye=False
df.head()

# 3) How to GET SCORE data? how to FILTER scored results?

In [None]:
X_test, y_test, tags = p.get_gold_validate_Xy()
caller_user_id = '91965d9c-40ca-4e47-9723-5a608a32a0e4' # Connect the scoring to a caller/user, globally for all rows

df = p.call_webservice(p.ws, X_test, caller_user_id, False) # Auto-fetch key from keyvault, 1stRowOnlye=False
df.head()

In [None]:
import datetime as dt
now = dt.datetime.now()
date_filter = now.strftime('%Y_%m_%d') 
print(date_filter)

In [None]:
ds_list, df_all = p.get_scored(date_filter, "10",'81965d9c-40ca-4e47-9723-5a608a32a0e4')

In [None]:
#df = p.get_scored('last_month')
ds_list1, df_all1 = p.get_scored(date_filter, "1",'81965d9c-40ca-4e47-9723-5a608a32a0e4')
print("Datasets found in filter", len(ds_list1))
print("Example: How many rows in 1st dataset? ", ds_list1[0].to_pandas_dataframe().shape[0])
print("All rows", df_all1.shape[0])

df_all1.head()

In [None]:
ds_list, df_all = p.get_scored(date_filter, "1")
print("Datasets found in filter", len(ds_list))
print("Example: How many rows in 1st dataset? ", ds_list[0].to_pandas_dataframe().shape[0])
print("All rows", df_all.shape[0])

 ** Train model & register ....this happended already in another notebook/MLops pipline step
> model = p.register_active_model()  
> print(model.name, model.description, model.version)  


# 3a) ESML `Deploy a new ONLINE` webservice (AKS)
- Deploy "offline" from old `AutoML run` for `DEV` environment
- To →  `DEV`, `TEST` or `PROD` environment
- ESML saves `API_key in Azure keyvault automatically`
- ESML auto-config solves 4 common 'errors/things': `correct compute name` and `valid replicas, valid agents, valid auto scaling`
    - Tip: You can adjust the number of replicas, and different CPU/memory configuration, or using a different compute target.

In [None]:
inference_config, model, best_run = p.get_active_model_inference_config(ws) #  AutoML support 
service,api_uri, kv_aks_api_secret= p.deploy_automl_model_to_aks(model,inference_config, True) # overwrite_endpoint=True

# Test the NEW AKS WebService

In [None]:
X_test, y_test, tags = p.get_gold_validate_Xy() # Version is default latest
print(tags)

In [None]:
X_test.head()

In [None]:
df = p.call_webservice(p.ws, X_test) # Auto-fetch key from keyvault, 1stRowOnlye=False  |   p.call_webservice(p.ws, X_test, caller_user_id, False)
df.head()

# Code to `embed in YOUR DriverApi` - to call this webservice `without ESML`
- `NB!` This will `NOT cache` the results automatically, since not going via the ESML SDK. You need to save the scoring yourself ( if this is needed)
    - ESML benefits: Has built in `logic` to save scored_results, to unique folders during the day.
            # Note: We can also save a GUID/ID, for which CALLER/User-Guid it is about. We can have a User_id_GUID as a "feature/column" in X_test, or as a parameter

In [None]:
def call_webservice_code(rows, api_uri,api_key, allowSelfSigned=True):
    # bypass the server certificate verification on client side
    if allowSelfSigned and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

    X_test_json_works = json.dumps({'data': rows.to_dict(orient='records')})

    headers = {'Content-Type':'application/json', 'Authorization': 'Bearer ' + api_key}
    resp = requests.post(api_uri, X_test_json_works , headers=headers) 

    # Here you can pass forward the response, save it to the datalake, or as below return a PANDAS dataframe
    res_dict = json.loads(resp.text)
    res_dict_ast = ast.literal_eval(res_dict)
    return pd.read_json(res_dict) # to pandas

## 3b) ESML `Deploy BATCH` pipeline
- Deploy same model "offline / previous" `AutoML Run` for `DEV` environment
- To →  `DEV`, `TEST` or `PROD` environment


# 5a alt2) If not using AutoML - you need to manuallt create `environemnt + entryscript + inference config`
https://github.com/Azure/MachineLearningNotebooks/blob/bda592a236eaf2dbc54b394e1fa1b539e0297908/how-to-use-azureml/deployment/production-deploy-to-aks/production-deploy-to-aks.ipynb

In [None]:
# Create ENVIRONMENT - If not AUTOML
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies 

conda_deps = CondaDependencies.create(conda_packages=['numpy','scikit-learn==0.19.1','scipy'], pip_packages=['azureml-defaults', 'inference-schema'])
myenv = Environment(name='myenv')
myenv.python.conda_dependencies = conda_deps

In [None]:
%%writefile score.py
import os
import pickle
import json
import numpy
from sklearn.externals import joblib
from sklearn.linear_model import Ridge

def init():
    global model
    # AZUREML_MODEL_DIR is an environment variable created during deployment.
    # It is the path to the model folder (./azureml-models/$MODEL_NAME/$VERSION)
    # For multiple models, it points to the folder containing all deployed models (./azureml-models)
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'sklearn_regression_model.pkl')
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)

# note you can pass in multiple rows for scoring
def run(raw_data):
    try:
        data = json.loads(raw_data)['data']
        data = numpy.array(data)
        result = model.predict(data)
        # you can return any data type as long as it is JSON-serializable
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

In [None]:
from azureml.core.model import InferenceConfig
inf_config = InferenceConfig(entry_script='score.py', environment=myenv)

# OTHER - What more can you retrieve from AutoMLFactory and ESMLProject?

In [None]:
from baselayer_azure_ml import AutoMLFactory
from baselayer_azure_ml import ComputeFactory

target_model, target_best_run_id = AutoMLFactory().get_latest_model(ws)
print(target_model.name)
print(target_best_run_id)

run, exp = p.get_active_model_run_and_experiment()
inference_config = p.get_active_model_inference_config()
print(run)
print(exp.name)