# LightGBM Model Train - Experiment Logging
LightGBM を利用してモデル学習します。本ノートブックでは Azure Machine Learning Python SDK を利用してメトリックの追跡を行います。

## Prerequisites
必要なライブラリをインポートします。

In [None]:
import lightgbm as lgb
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
import pandas as pd
import joblib
import os

## Initialize Workspace
Azure Machine Learning Workspace へ接続します。

In [None]:
from azureml.core import Workspace, Experiment, Dataset, Datastore
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

## Data Preparation
ボストンの住宅価格のサンプルデータをロードします。

In [None]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['target'] = boston.target

In [None]:
#df = Dataset.get_by_name(ws, "boston")

In [None]:
DATA_DIR = 'data'
os.makedirs(DATA_DIR, exist_ok=True)
df.to_csv(os.path.join(DATA_DIR, 'boston.csv'), header=True, index=False)

##  Register Dataset
サンプルデータを Azure ML Dataset として登録します。

In [None]:
# Azure ML デフォルトの Datastore
datastore = ws.get_default_datastore()

# upload the local file from src_dir to the target_path in datastore
datastore.upload(src_dir=DATA_DIR, target_path='data', overwrite=True)

# create a dataset referencing the cloud location
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, ('data/boston.csv'))])

In [None]:
# register as dataset
dataset.register(ws, 
                 name='boston', 
                 create_new_version=True
                )

## Split Data
データをインポートし、説明変数と目的変数に分割します。

In [None]:
# Azure ML Dataset からデータを読み込み
boston_dataset = Dataset.get_by_name(ws, name='boston')
df = boston_dataset.to_pandas_dataframe()

In [None]:
df.head()

In [None]:
X, y = df.drop(['target'], axis=1), df['target']

In [None]:
# データ分割
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = 0.2, 
                                                    random_state=0)

## Model Training
LightGBM を用いて分類モデルを作成します。

In [None]:
# 実験名
exp = Experiment(ws, "lgb-aml-exp")

# メトリック取得開始
run = exp.start_logging()

In [None]:
## Parameter list (https://lightgbm.readthedocs.io/en/latest/Parameters.html)
params = {
    'boosting_type': 'gbdt',
    'learning_rate': 0.15,
    'n_estimators':1000,
    'colsample_bytree': 1.0,
    'reg_alpha': 1e-3,
    'reg_lambda': 1e-3,
    'seed': 1234,
}

In [None]:
# callback
def log_evaluation(run):
    def callback(env):
        for data_name, eval_name, result, _ in env.evaluation_result_list:
            run.log(f'{data_name}+{eval_name}', result)
    return callback

In [None]:
clf =lgb.LGBMRegressor(**params)

result = clf.fit(X_train, y_train, 
                   verbose=50, 
                   #categorical_feature=X_cat.columns.tolist(), 
                   eval_set=[(X_train, y_train), (X_test, y_test)],
                   eval_names=['train', 'test'],
                   early_stopping_rounds=100,
                   callbacks=[log_evaluation(run)]
                  )


In [None]:
train_l2 = result.evals_result_["train"]["l2"][-1]
test_l2 = result.evals_result_["test"]["l2"][-1]
run.log("train l2", train_l2)
run.log("test l2", test_l2)

## Model Save & Register

モデルを pickle ファイルに変換します。また Azure ML のモデルとして登録します。

In [None]:
MODEL_DIR = 'model'
os.makedirs(MODEL_DIR, exist_ok=True)

In [None]:
model_file = 'lgb-aml.pkl'
with open(os.path.join(MODEL_DIR, model_file), 'wb') as f:
    joblib.dump(result, f)

In [None]:
run.upload_files(names=['model/'+model_file],
                       paths=['model/'+model_file])
run.register_model(
    model_name='lgb-aml',
    model_path='model',
    datasets =[('training data', boston_dataset)]
)

In [None]:
# メトリック追跡完了
run.complete()

In [None]:
run