In [5]:
# imports
import mlflow

import pandas as pd
import lightgbm as lgbm

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [6]:
!wget -O iris.csv https://azuremlexamples.blob.core.windows.net/datasets/iris.csv

--2021-12-07 09:11:45--  https://azuremlexamples.blob.core.windows.net/datasets/iris.csv
Resolving azuremlexamples.blob.core.windows.net (azuremlexamples.blob.core.windows.net)... 20.60.128.132
Connecting to azuremlexamples.blob.core.windows.net (azuremlexamples.blob.core.windows.net)|20.60.128.132|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4466 (4.4K) [text/csv]
Saving to: ‘iris.csv’


2021-12-07 09:11:46 (298 MB/s) - ‘iris.csv’ saved [4466/4466]



In [7]:
# read in data
df = pd.read_csv("iris.csv")

# split dataframe into X and y
X = df.drop(["species"], axis=1)
y = df["species"]

# encode label
enc = LabelEncoder()
y = enc.fit_transform(y)

# train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# create lightgbm datasets
train_data = lgbm.Dataset(X_train, label=y_train)
test_data = lgbm.Dataset(X_test, label=y_test)

In [8]:
# train the model
mlflow.autolog()

# setup parameters
num_boost_round = 10
params = {
    "objective": "multiclass",
    "num_class": 3,
    "boosting": "gbdt",
    "num_iterations": 16,
    "num_leaves": 31,
    "num_threads": 0,
    "learning_rate": 0.1,
    "metric": "multi_logloss",
    "seed": 42,
    "verbose": 0,
}

# train model
with mlflow.start_run() as run:
    model = lgbm.train(
        params,
        train_data,
        num_boost_round=num_boost_round,
        valid_sets=[test_data],
        valid_names=["test"],
    )


2021/12/07 09:11:46 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2021/12/07 09:11:46 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


You can set `force_col_wise=true` to remove the overhead.
[1]	test's multi_logloss: 0.930558
[2]	test's multi_logloss: 0.795536
[3]	test's multi_logloss: 0.68756
[4]	test's multi_logloss: 0.593833
[5]	test's multi_logloss: 0.51883
[6]	test's multi_logloss: 0.454422
[7]	test's multi_logloss: 0.401051
[8]	test's multi_logloss: 0.353053
[9]	test's multi_logloss: 0.313256
[10]	test's multi_logloss: 0.276926
[11]	test's multi_logloss: 0.247315
[12]	test's multi_logloss: 0.221442
[13]	test's multi_logloss: 0.199252
[14]	test's multi_logloss: 0.177485
[15]	test's multi_logloss: 0.160641
[16]	test's multi_logloss: 0.144921
