## 추가해야하는 내용

In [1]:
# Check core SDK version number
import azureml.core
import os

print("SDK version:", azureml.core.VERSION)

SDK version: 1.24.0


# 1. Azure ML Service 작업 영업에 연결 

#### Notebook이 실행되는 경로 내 Azure ML Service Workspace 접속 정보를 가진 config 파일이 있어야한다.

 - 테스트 배포 시에는 'data/config_dev.json' 사용
 - 실제 배포 시에는 'data/config.json' 사용

In [2]:
from azureml.core import Workspace

ws = Workspace.from_config('deployment/config/config_dev.json')
print('기계학습이름:'+ws.name, '리소스그룹이름:'+ws.resource_group, '리전이름:'+ws.location, '구독ID:'+ws.subscription_id, sep = '\n')

기계학습이름:mltest21
리소스그룹이름:bigdatateam
리전이름:koreacentral
구독ID:245d9749-ca17-42f2-ae5e-2aefd9de113b


# 2. 모델 등록

#### 등록할 모델의 버전을 명시해준다. 

In [3]:
model_version = "03_050_02"
file_name = '{}.pickle'.format(model_version)
print(file_name)

03_050_02.pickle


## Description 수정해아함!!!

#### 모델을 등록해준다. description에 현재 모델에 대한 간단한 설명을 추가할 수 있다.

 - 실행하면, [Azure Portal > 기계 학습 > 기계학습이름(위에서 확인) > 모델] 에서 확인 가능함

In [4]:
from azureml.core.model import Model

#library_version = "DL"+sklearn.__version__.replace(".","x")
model = Model.register(model_path = "deployment/data/pkl/{}".format(file_name),
                       model_name = model_version,
                       tags = {'area': "IoT Edge", 'type': "azureml-automl"},
                       description = "AzureML SDK upgrade",
                       workspace = ws)

Registering model 03_050_02


# 3. 이미지 등록

#### 등록 된 모델 이름을 사용 하 여 모델 파일에 대 한 경로를 반환

In [5]:
from azureml.core.model import Model
model = Model.get_model_path(model_version, _workspace=ws)
model

'azureml-models\\03_050_02\\20\\03_050_02.pickle'

#### 사용되는 package 선언. Edge 환경에서 해당 package를 사용되기 때문에 channel 과 필요한 package명을 선언 해주어야 한다. 

In [6]:
pip_packages = ["azureml-defaults",
                "azureml==0.2.7",
                "azureml-core==1.24.0",
                "azureml-sdk==1.24.0",
                "azureml-automl-core==1.24.0",
                "azureml-automl-runtime==1.24.0",
                "azure-storage-blob==12.6.0",
                "numpy==1.18.5",
                "pandas==0.25.3",
                "sqlalchemy==1.3.21",
                "h5py==2.10.0",
                "tqdm==4.54.0",
                "obspy==1.2.2",
                "mysql-connector-python==8.0.18",
                "joblib==0.14.1",
                "packaging==20.7",
                "xgboost==0.90",
                "noisereduce==1.1.0"]

lib_config_load = ['pyyaml']
lib_clfs = ["gxx_linux-64",
            "gcc_linux-64",
            "librosa==0.8.0"]
#lib_clfs = ['tensorflow==1.1']
conda_packages = lib_clfs + lib_config_load

print('pip packages:', pip_packages)
print('conda packasges:', conda_packages)

pip packages: ['azureml-defaults', 'azureml==0.2.7', 'azureml-core==1.24.0', 'azureml-sdk==1.24.0', 'azureml-automl-core==1.24.0', 'azureml-automl-runtime==1.24.0', 'azure-storage-blob==12.6.0', 'numpy==1.18.5', 'pandas==0.25.3', 'sqlalchemy==1.3.21', 'h5py==2.10.0', 'tqdm==4.54.0', 'obspy==1.2.2', 'mysql-connector-python==8.0.18', 'joblib==0.14.1', 'packaging==20.7', 'xgboost==0.90', 'noisereduce==1.1.0']
conda packasges: ['gxx_linux-64', 'gcc_linux-64', 'librosa==0.8.0', 'pyyaml']


In [7]:
from azureml.core.conda_dependencies import CondaDependencies 
from azureml.core import Environment

conda_deps  = CondaDependencies.create(conda_packages=conda_packages, pip_packages=pip_packages)
conda_deps.add_channel('conda-forge')
conda_deps.add_channel('defaults')

myenv = Environment(name='myenv')
myenv.python.conda_dependencies = conda_deps
myenv.docker.base_image = 'mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04'
# myenv.docker.base_image = 'mcr.microsoft.com/azureml/base:openmpi3.1.2-ubuntu18.04'


# with open("data/myenv.yml","w") as f:
#     f.write(myenv.serialize_to_string())

# 4. score_iot.py 파일(전처리 및 ML 판정 수행 로직 실행 스크립트) 생성

 ### 아래 cell 이 실행되면 score_iot.py에 실행 스크립트가 생성됨

### score_iot_test.ipynb에 해당 스크립트의 내용을 복사하여, 실행 시 에러가 발생하지 않는지 꼭 확인!

In [8]:
import _locale
_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])

In [9]:
%%writefile src/score_iot.py


# For Edge
import json
import yaml
import time
import os
import re
import pickle
import shutil
from datetime import datetime
from datetime import timedelta
from time import sleep
from azureml.core.model import Model

# For Clf
import h5py
import pandas as pd
from classify import Classifier
from preprocess_signal_data import Run_
from pkgs.pipelines import FeatureDeployed as Featurizing
from pkgs.utils import read_json


def init_preprocessor(feature_params, Featurizing, test_no, input_type="ReadData"):
    # set preprocess class
    preproc = Run_(
        feature_params=feature_params, Featurizing=Featurizing, test_no=test_no
    )
    if input_type == "ReadData":
        preproc.pl.steps = [
            (step_name, step)
            for step_name, step in preproc.pl.steps
            if step.name != "ReadData"
        ]

    return preproc


def score(
    preproc,
    input_values,
    feature_names,
    column_names,
    cut_off,
    rule_model_params,
    mode,
    input_name="ReadData",
    parallel=1,
    verbose=0,
):
    # Preprocess
    df = preproc.pl.run(
        input_name=input_name,
        input_values=[input_values],
        feature_names=feature_names,
        parallel=parallel,
        verbose=verbose,
    )
    df = pd.DataFrame(df[0])
    df["TEST_NO"] = [0,1,2]
    df_features = df[column_names]

    # Predict
    df["PROB"] = loaded_model.predict_proba(df_features)[:,1]
    
    # Classify
    df["ML_R"] = df.PROB >= cut_off
    df["ML_R"] = df.ML_R.astype(int)
    df = Classifier.classify_all(
        df=df[df.TEST_NO.isin(preproc.featurizer.params["test_no"])],
        rule_cutoffs=rule_model_params,
        mode=mode,
    )
    return df, df_features


def init():
    global preproc, loaded_model, model_version, column_names, line, cut_off, rule_model_params, ml_model_params, model_path, error_dir
    # 수정필요  --------------------------------------------------------
    model_version = "03_050_02"  # model_name 입력
    file_name = "{}.pickle".format(model_version)
    edge_config = "/home/data/edge_config.yml"
#     file_name = "deployment/data/pkl/{}.pickle".format(model_version)
#     edge_config = "deployment/config/edge_config.yml"
    feature_params = read_json("src/config/featureD050_parameters.json")
    column_names = read_json("src/config/model_03_050_02_features.json")["feature_names"]
    # ----------------------------------------------------------------

    # load line info
    with open(edge_config, "r") as stream:
        try:
            edge_config = yaml.load(stream, Loader=yaml.BaseLoader)
            line = edge_config["config"]["line"]["name"]
            cut_off = float(edge_config["config"]["param"]["cutoff"])
            rule_model_params = edge_config["config"]["param"]["rule_model"]
            ml_model_params = edge_config["config"]["param"]["ml_model"]

        except yaml.YAMLError as exc:
            print("line config error: ", exc)

    # set preprocess class
    for param_, dic_ in ml_model_params.items():
        for key_, value_ in dic_.items():
            feature_params[param_][key_] = float(value_)
    preproc = init_preprocessor(
        feature_params=feature_params, Featurizing=Featurizing, test_no=[0, 1, 2]
    )

    # load model
    model_path = Model.get_model_path(file_name)
    with open(model_path, "rb") as f:
        loaded_model = pickle.load(f)
    
    error_dir = "/home/data/error_file/"
    try:
        os.makedirs(error_dir, mode=777)
    except:
        pass


def run(input_json):
    print("\n", "mlmodule start")
    print("\n", datetime.now() + timedelta(hours=9), "\n")
    # for test#
    input_json = json.loads(input_json)
    print("\n", "json loaded", "\n")
    print(input_json, "\n")
    mltime = datetime.now() + timedelta(hours=9)
    chtime = input_json["chtime"]
    print("chtime : ", chtime)
    ct = datetime.strptime(
        chtime.replace("T", " ").split("+")[0][:-1], "%Y-%m-%d %H:%M:%S.%f"
    )
    diff = mltime - ct
    input_json["chtime"] = str(ct)
    input_json["mltime"] = str(mltime)
    input_json["etime_ch"] = diff.seconds + diff.microseconds / 1e6

    # file load
    init_time = time.time()
    # input_json = json.loads(input_json)
    input_path = input_json["path"]
    print("\n", input_path)

    for attempt in range(3):
        try:
#             input_raw = pr.ReadDataBlob(input_path)
            print("Attempt: " + str(attempt + 1) + "  Time: " + str(datetime.now()))
            with h5py.File(input_path, "r") as f:
                tmp = f["Raw"][:]
            input_raw = pd.DataFrame(tmp, columns=[0, 1, 2])
            print("\n", "hdf loaded")

        except Exception as ex:
            if attempt == 2:
                ex_message = str(ex)

            sleep(0.02)
            continue

        break

    else:
        input_json["b"] = 2
        input_json["prob"] = 2
        input_json["error"] = ex_message
        input_json["etime"] = time.time() - init_time

        result_json = [json.dumps(input_json)]
        print("*" * 5, " ", "LOAD ERROR", " ", "*" * 5)
        print(result_json)
        try:
            shutil.copy(input_path, error_dir)
        except:
            pass

    # for test#
    # diff = (datetime.now()+ timedelta(hours=9))-mltime
    # input_json['etime_load'] = diff.seconds + diff.microseconds/1E6
    input_json["etime_load"] = time.time() - init_time

    ### json insert
    input_json["cutoff"] = cut_off
    input_json["TRHD_NM_SET"] = "/".join([key for key in rule_model_params.keys()] + [f"{key_}-{param}" for key_, dict_ in ml_model_params.items() for param, value in dict_.items()])
    input_json["TRHD_VAL_SET"] = "/".join(
        [str(value) for value in rule_model_params.values()]+[str(value) for key_, dict_ in ml_model_params.items() for param, value in dict_.items()]
    )
    input_json["FTUR_NM_SET"] = "/".join(
        [
            "/".join([col + "_TEST_N1" for col in column_names]),
            "/".join([col + "_TEST_N2" for col in column_names]),
            "/".join([col + "_TEST_N3" for col in column_names]),
        ]
    )
    input_json["lid"] = line
    input_json["v"] = model_version

    try:
        filename = input_path.split("/")[-1]
        filename = filename.strip("Data\\").split("_")
        input_json["bc"] = "_".join(filename[0:2])
        input_json["dtfull"] = filename[2][:-3]

    except Exception as ex:  # 에러 종류
        input_json["bc"] = "ERROR"
        input_json["dtfull"] = str(datetime.now())
        input_json["r"] = 2
        input_json["prob"] = 2
        input_json["error"] = str(ex)
        input_json["etime"] = time.time() - init_time

        result_json = [json.dumps(input_json)]
        print("*" * 5, " ", "FILENAME ERROR", " ", "*" * 5)
        print(result_json)
        try:
            cmd = "cp " + input_json["path"] + " " + error_dir
            os.system(cmd)
        except:
            pass

    try:
        df, df_features = score(
            preproc=preproc,
            input_values=input_raw,
            feature_names=preproc.featurizer.params["feature_names"],
            column_names=column_names,
            cut_off=cut_off,
            rule_model_params=rule_model_params,
            mode="or",
            input_name="ReadData",
            parallel=1,
            verbose=0,
        )
        
        input_json["r"] = int(df.R.values[0])
        input_json["prob"] = float(df.PROB.values[0])
        input_json["error"] = str(df.TEST_NG_R.values[0])
        input_json["etime"] = time.time() - init_time
        input_json["FTUR_VAL_SET"] = "/".join(
            [str(value) for value in df_features.values.flatten()]
        )

        result_json = [json.dumps(input_json)]
        print(result_json)

    except Exception as ex:  # 에러 종류
        input_json["r"] = 2
        input_json["prob"] = 2
        input_json["error"] = str(ex)
        input_json["etime"] = time.time() - init_time
        input_json["FTUR_VAL_SET"] = ""

        result_json = [json.dumps(input_json)]
        print("*" * 5, " ", "ERROR", " ", "*" * 5)
        print(result_json)
        try:
            cmd = "cp " + input_json["path"] + " " + error_dir
            os.system(cmd)
        except:
            pass

    return result_json


Overwriting src/score_iot.py


In [11]:
import os
os.getcwd()

'/mnt/batch/tasks/shared/LS_root/mounts/clusters/mc-cpi01/code/MC2'

# 5. Azure MLWS에 이미지 업로드

In [15]:
from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script="score_iot.py", environment=myenv, source_directory="src")
package_service = Model.package(workspace=ws,
                                models=[model],
                                image_name="nam-mlmodule",
                                image_label=17,
                                inference_config=inference_config)

In [16]:
package_service.wait_for_creation(show_output=True)

2021/03/22 05:37:12 Downloading source code...
2021/03/22 05:37:14 Finished downloading source code
2021/03/22 05:37:14 Creating Docker network: acb_default_network, driver: 'bridge'
2021/03/22 05:37:14 Successfully set up Docker network: acb_default_network
2021/03/22 05:37:14 Setting up Docker configuration...
2021/03/22 05:37:15 Successfully set up Docker configuration
2021/03/22 05:37:15 Logging in to registry: lsmcdevmlwsfe4aa203.azurecr.io
2021/03/22 05:37:16 Successfully logged into lsmcdevmlwsfe4aa203.azurecr.io
2021/03/22 05:37:16 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2021/03/22 05:37:16 Scanning for dependencies...
2021/03/22 05:37:17 Successfully scanned dependencies
2021/03/22 05:37:17 Launching container with name: acb_step_0
Sending build context to Docker daemon  67.07kB
Step 1/18 : FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04@sha256:a1b514f3ba884b9a7695cbba5638933ddaf222e8ce3e8c81e8cdf8

In [17]:
print(package_service.state)

Succeeded


In [18]:
print(package_service.location)

lsmcdevmlwsfe4aa203.azurecr.io/nam-mlmodule@sha256:fc279fcc08d4f7cc5dd55250fa879e57f0718c84337fa11e86a200d9cfee4f0c
