In [None]:
import os
from typing import Callable

import pandas as pd
import lightgbm as lgb
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

import mlflow

from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

In [None]:
subscription_id = "SUBSCRIPTION_ID"
resource_group = "RESOURCE_GROUP"
workspace = "AML_WORKSPACE_NAME"

ml_client = MLClient(
    DefaultAzureCredential(),
    subscription_id,
    resource_group,
    workspace,
)

azureml_mlflow_uri = ml_client.workspaces.get(
    ml_client.workspace_name
).mlflow_tracking_uri

mlflow.set_tracking_uri(azureml_mlflow_uri)

In [None]:
exp = mlflow.set_experiment("chapter5-lightgbm-notebook")

In [None]:
# データをpandas データフレームとして読み込み
credit_df = pd.read_excel(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls",
    header=1,
    index_col=0
)

# 分割
_df, test_df = train_test_split(
    credit_df,
    test_size=0.2,
)

train_df, valid_df = train_test_split(
    _df,
    test_size=0.2,
)

# 加工
y_train = train_df.pop("default payment next month")
X_train = train_df.values
train_dataset = lgb.Dataset(X_train, label=y_train)

y_valid = valid_df.pop("default payment next month")
X_valid = valid_df.values
valid_dataset = lgb.Dataset(X_valid, label=y_valid)

y_test = test_df.pop("default payment next month")
X_test = test_df.values

In [None]:
run = mlflow.start_run()

In [None]:
params = {
    "objective": "binary",
    "boosting_type": "gbdt",
    "num_leaves": 20,
    "max_depth": 10,
    "learning_rate": 0.1,
    "device_type": "cpu",
    "seed": 42,
    "deterministic": True,
}
mlflow.log_params(params)

In [None]:
def log_metrics() -> Callable[[lgb.callback.CallbackEnv], None]:
    def _callback(env: lgb.callback.CallbackEnv) -> None:
        mlflow.log_metric(env.evaluation_result_list[0][1], env.evaluation_result_list[0][2])
        print(f"iteration {env.iteration} {env.evaluation_result_list[0][1]}: {env.evaluation_result_list[0][2]}")
    _callback.order = 10
    return _callback

In [None]:
clf = lgb.train(
    params,
    train_set=train_dataset,
    valid_sets=[valid_dataset],
    valid_names=['valid'],
    callbacks=[log_metrics(), lgb.early_stopping(stopping_rounds=10, verbose=True)]
)

In [None]:
y_prob = clf.predict(X_test)
y_pred = [1 if y_prob >= 0.5 else 0 for y_prob in y_prob]

result = classification_report(y_test, y_pred, output_dict=True)
print(result)
mlflow.log_metrics(result["0"])

In [None]:
model_path = "model.txt"
clf.save_model(model_path)

artifacts = {"lgb_model_path": model_path}

signature = mlflow.models.signature.infer_signature(X_test, y_prob)

In [None]:
class LGBWrapper(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        import lightgbm as lgb
        self.lgb_model = lgb.Booster(model_file=context.artifacts["lgb_model_path"])

    def predict(self, context, model_input):
        return self.lgb_model.predict(model_input)

In [None]:
mlflow_model_dir = 'lgb_model'
mlflow.pyfunc.log_model(
    artifact_path=mlflow_model_dir,
    python_model=LGBWrapper(),
    conda_env='src/environment.yaml',
    artifacts=artifacts,
    signature=signature,
)


In [None]:
mlflow.end_run()

In [None]:
loaded_model = mlflow.pyfunc.load_model(model_uri=f"runs:/{run.info.run_id}/{mlflow_model_dir}/")

In [None]:
print(clf.predict(X_test))
print(loaded_model.predict(X_test))

In [None]:
mlflow.register_model(
    model_uri=f"runs:/{run.info.run_id}/{mlflow_model_dir}/",
    name='chapter5-pyfunc-model'
)