In [54]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

data = load_iris()
train_x, test_x = train_test_split(data.data, train_size=0.8, shuffle=False)
train_y, test_y = train_test_split(data.target, train_size=0.8, shuffle=False)
train_x = pd.DataFrame(train_x, columns=data['feature_names'])
test_x = pd.DataFrame(test_x, columns=data['feature_names'])
train_y = pd.DataFrame(train_y)
test_y = pd.DataFrame(test_y)

print(train_x.shape)
print(train_y.shape)

num_cols = train.shape[1]

(120, 4)
(120, 1)


## xgboost

In [1]:
%pip install xgboost

Collecting xgboost
  Downloading xgboost-1.7.1-py3-none-manylinux2014_aarch64.whl (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: xgboost
Successfully installed xgboost-1.7.1
[0mNote: you may need to restart the kernel to use updated packages.


In [56]:
import xgboost as xgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold

scores = []
kf = KFold(n_splits=4, shuffle=True, random_state=71)
for tr_idx, va_idx in kf.split(train_x):
    tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
    tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]
    
    dtrain = xgb.DMatrix(tr_x, label=tr_y)
    dvalid = xgb.DMatrix(va_x, label=va_y)
    dtest = xgb.DMatrix(test_x)
    
    params = {'objective': 'multi:softmax', 'num_class': 3, 'silent': 1, 'eval_metric': 'mlogloss', 'random_state': 71}
    num_round = 50
    
    watch_list = [(dtrain, 'train'), (dvalid, 'eval')]
    model = xgb.train(params, dtrain, num_round, evals=watch_list, early_stopping_rounds=20)
    
    va_xgb_pred = model.predict(dvalid)
    va_xgb_y = np.ravel(va_y.values)
    va_xgb_pred = np.array(va_xgb_pred, dtype='int64')
    print(va_xgb_y)
    print(va_xgb_pred)
    score = accuracy_score(va_xgb_y, va_xgb_pred)
    print(f'acc: {score:.4f}')
    
    pred = model.predict(dtest)

Parameters: { "silent" } are not used.

[0]	train-mlogloss:0.74247	eval-mlogloss:0.73154
[1]	train-mlogloss:0.53368	eval-mlogloss:0.51536
[2]	train-mlogloss:0.39528	eval-mlogloss:0.37386
[3]	train-mlogloss:0.30039	eval-mlogloss:0.27643
[4]	train-mlogloss:0.23324	eval-mlogloss:0.20810
[5]	train-mlogloss:0.18484	eval-mlogloss:0.15901
[6]	train-mlogloss:0.14944	eval-mlogloss:0.12323
[7]	train-mlogloss:0.12323	eval-mlogloss:0.09686
[8]	train-mlogloss:0.10145	eval-mlogloss:0.07771
[9]	train-mlogloss:0.08468	eval-mlogloss:0.06276
[10]	train-mlogloss:0.07171	eval-mlogloss:0.05102
[11]	train-mlogloss:0.06165	eval-mlogloss:0.04282
[12]	train-mlogloss:0.05450	eval-mlogloss:0.03683
[13]	train-mlogloss:0.04790	eval-mlogloss:0.03171
[14]	train-mlogloss:0.04367	eval-mlogloss:0.02854
[15]	train-mlogloss:0.04022	eval-mlogloss:0.02598
[16]	train-mlogloss:0.03658	eval-mlogloss:0.02345
[17]	train-mlogloss:0.03418	eval-mlogloss:0.02117
[18]	train-mlogloss:0.03323	eval-mlogloss:0.02022
[19]	train-mlogloss:

## lightgbm

In [44]:
%pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-3.3.3-py3-none-manylinux2014_aarch64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: lightgbm
Successfully installed lightgbm-3.3.3
[0mNote: you may need to restart the kernel to use updated packages.


In [62]:
import lightgbm as lgb
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold

scores = []
kf = KFold(n_splits=4, shuffle=True, random_state=71)
for tr_idx, va_idx in kf.split(train_x):
    tr_x, va_x = train_x.iloc[tr_idx], train_x.iloc[va_idx]
    tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]
    
    lgb_train = lgb.Dataset(tr_x, tr_y)
    lgb_eval = lgb.Dataset(va_x, va_y)
    
    params = {'objective': 'multiclass', 'num_class': 3, 'verbose': -1, 'metrics': 'multi_logloss', 'seed': 71}
    num_round = 100
    
    model = lgb.train(params, lgb_train, num_boost_round=num_round, valid_names=['train', 'valid'], 
                      valid_sets=[lgb_train, lgb_eval])
    
    va_lgb_pred = model.predict(va_x)
    va_lgb_y = np.ravel(va_y.values)
    va_lgb_pred = np.array(va_lgb_pred, dtype='int64')
    print(va_lgb_y.shape)
    print(va_lgb_pred.shape)
    score = log_loss(va_lgb_y, va_lgb_pred)
    print(f'log_loss: {score:.4f}')
    
    pred = model.predict(test_x)

[1]	train's multi_logloss: 0.897169	valid's multi_logloss: 0.884031
[2]	train's multi_logloss: 0.781904	valid's multi_logloss: 0.776614
[3]	train's multi_logloss: 0.68585	valid's multi_logloss: 0.686672
[4]	train's multi_logloss: 0.607823	valid's multi_logloss: 0.607202
[5]	train's multi_logloss: 0.54304	valid's multi_logloss: 0.540519
[6]	train's multi_logloss: 0.488563	valid's multi_logloss: 0.484401
[7]	train's multi_logloss: 0.44219	valid's multi_logloss: 0.435525
[8]	train's multi_logloss: 0.399796	valid's multi_logloss: 0.384996
[9]	train's multi_logloss: 0.363403	valid's multi_logloss: 0.344582
[10]	train's multi_logloss: 0.332188	valid's multi_logloss: 0.306834
[11]	train's multi_logloss: 0.303563	valid's multi_logloss: 0.275915
[12]	train's multi_logloss: 0.279462	valid's multi_logloss: 0.246297
[13]	train's multi_logloss: 0.257545	valid's multi_logloss: 0.219838
[14]	train's multi_logloss: 0.238376	valid's multi_logloss: 0.202666
[15]	train's multi_logloss: 0.223245	valid's m