In [1]:
import numpy as np
import pandas as pd

In [2]:
from lightautoml.reader.gpu.cudf_reader import CudfReader
from lightautoml.reader.base import PandasToPandasReader

from lightautoml.transformers.base import SequentialTransformer

from lightautoml.pipelines.utils import get_columns_by_role

from lightautoml.transformers.gpu import numeric_gpu, categorical_gpu, datetime_gpu
from lightautoml.transformers import numeric, categorical, datetime

from lightautoml.tasks import Task
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML

import pandas as pd
import time
from sklearn.model_selection import train_test_split

from dask.distributed import Client
from dask_cuda import LocalCUDACluster
import cudf

from lightautoml.dataset.roles import TargetRole

import os
import joblib

In [3]:
key = 'airlines'
adv_roles = True
args_fold = 2

data_info = joblib.load(os.path.join("../../data/old_presets", 'data_info.pkl'))[key]
folds = joblib.load(os.path.join("../../data/old_presets", 'folds', '{0}.pkl'.format(key)))

read_csv_params = {}
if 'read_csv_params' in data_info:
    read_csv_params = {**read_csv_params, **data_info['read_csv_params']}

data = pd.read_csv(os.path.join("../../data/old_presets/data", data_info['path']), **read_csv_params)

if 'drop' in data_info:
    data.drop(data_info['drop'], axis=1, inplace=True)

if 'class_map' in data_info:
    data[data_info['target']] = data[data_info['target']].map(data_info['class_map']).values
    assert data[data_info['target']].notnull().all(), 'Class mapping is set unproperly'

print(data.head())
print("task type:", data_info['task_type'])

roles = {TargetRole(): data_info['target']}

  Airline  Flight AirportFrom AirportTo  DayOfWeek  Time  Length  Delay
0      CO     269         SFO       IAH          3    15     205      1
1      US    1558         PHX       CLT          3    15     222      1
2      AA    2400         LAX       DFW          3    20     165      1
3      AA    2466         SFO       DFW          3    20     195      1
4      AS     108         ANC       SEA          3    30     202      0
task type: binary


## Imports (for potential use)

In [4]:
# Imports from our package
from lightautoml.automl.base import AutoML

from lightautoml.automl.presets.gpu.tabular_gpu_presets import TabularAutoMLGPU, TabularUtilizedAutoMLGPU
from lightautoml.tasks import Task

from lightautoml.pipelines.features.gpu.lgb_pipeline_gpu import LGBSimpleFeaturesGPU, LGBAdvancedPipelineGPU
from lightautoml.pipelines.features.gpu.linear_pipeline_gpu import LinearFeaturesGPU

from lightautoml.pipelines.features.lgb_pipeline import LGBSimpleFeatures, LGBAdvancedPipeline
from lightautoml.pipelines.features.linear_pipeline import LinearFeatures


from lightautoml.ml_algo.gpu.boost_cb_gpu import BoostCBGPU
from lightautoml.ml_algo.gpu.boost_xgb_gpu import BoostXGB
from lightautoml.ml_algo.gpu.linear_gpu import LinearLBFGSGPU

from lightautoml.ml_algo.boost_cb import BoostCB
from lightautoml.ml_algo.linear_sklearn import LinearLBFGS


from lightautoml.pipelines.ml.base import MLPipeline
from lightautoml.pipelines.selection.importance_based import ModelBasedImportanceEstimator, ImportanceCutoffSelector

## TabularAutoML

In [5]:
task = Task(data_info['task_type'])

In [6]:
automl = TabularAutoML(
    task = task, 
    timeout = 3600,
    cpu_limit = 4,
    reader_params = {'n_jobs': 4, 'cv': 3, 'random_state': 42},
    general_params = {'use_algos': ['linear_l2', 'cb', 'lgbm']}
)

In [7]:
cpu_fit_pred = automl.fit_predict(data[folds!=args_fold].reset_index().drop(['index'],axis=1), roles = roles, verbose = 2)

[14:20:56] Stdout logging level is INFO2.
[14:20:56] Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer
[14:20:56] Task: binary

[14:20:56] Start automl preset with listed constraints:
[14:20:56] - time: 3600.00 seconds
[14:20:56] - CPU: 4 cores
[14:20:56] - memory: 16 GB

[14:20:56] [1mTrain data shape: (431506, 8)[0m

[14:21:00] Feats was rejected during automatic roles guess: []
[14:21:00] Layer [1m1[0m train process start. Time left 3595.81 secs
[14:21:11] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[14:21:11] ===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m =====
[14:21:14] ===== Start working with [1mfold 1[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m =====
[14:21:16] ===== Start working with [1mfold 2[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m =====
[14:21:19] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.7079384207630041[0m
[14:21:19] [1mLvl_0_Pipe_0_Mod_0_Line

  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:21:46] ===== Start working with [1mfold 1[0m for [1mLvl_1_Pipe_0_Mod_0_CatBoost[0m =====


  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:21:54] ===== Start working with [1mfold 2[0m for [1mLvl_1_Pipe_0_Mod_0_CatBoost[0m =====


  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:22:01] Fitting [1mLvl_1_Pipe_0_Mod_0_CatBoost[0m finished. score = [1m0.7167728338119556[0m
[14:22:01] [1mLvl_1_Pipe_0_Mod_0_CatBoost[0m fitting and predicting completed
[14:22:01] Time left 3535.22 secs

[14:22:01] [1mLayer 2 training completed.[0m

[14:22:01] Layer [1m3[0m train process start. Time left 3535.20 secs
[14:22:13] Start fitting [1mLvl_2_Pipe_0_Mod_0_LightGBM[0m ...
[14:22:13] ===== Start working with [1mfold 0[0m for [1mLvl_2_Pipe_0_Mod_0_LightGBM[0m =====
[14:22:18] ===== Start working with [1mfold 1[0m for [1mLvl_2_Pipe_0_Mod_0_LightGBM[0m =====
[14:22:24] ===== Start working with [1mfold 2[0m for [1mLvl_2_Pipe_0_Mod_0_LightGBM[0m =====
[14:22:32] Fitting [1mLvl_2_Pipe_0_Mod_0_LightGBM[0m finished. score = [1m0.7176078633171086[0m
[14:22:32] [1mLvl_2_Pipe_0_Mod_0_LightGBM[0m fitting and predicting completed
[14:22:32] Time left 3504.09 secs

[14:22:32] [1mLayer 3 training completed.[0m

[14:22:32] [1mAutoml preset training completed

In [8]:
cpu_pred = automl.predict(data[folds==args_fold].reset_index().drop(['index'],axis=1))

  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,


In [9]:
task = Task(data_info['task_type'], device='gpu')

In [10]:
automl_gpu = TabularAutoMLGPU(
    task = task, 
    timeout = 3600,
    cpu_limit = 1,
    reader_params = {'n_jobs': 1, 'cv': 3, 'random_state': 42},
    general_params = {'use_algos': ['linear_l2', 'xgb', 'cb']}
)

In [11]:
gpu_fit_pred = automl_gpu.fit_predict(data[folds!=args_fold].reset_index().drop(['index'],axis=1), roles = roles, verbose = 2)

[14:22:39] Stdout logging level is INFO2.
[14:22:39] Task: binary

[14:22:39] Start automl preset with listed constraints:
[14:22:39] - time: 3600.00 seconds
[14:22:39] - CPU: 1 cores
[14:22:39] - memory: 16 GB

[14:22:39] Train data shape: (431506, 8)
Feats was rejected during automatic roles guess: []
[14:22:40] Layer [1m1[0m train process start. Time left 3599.34 secs
[14:22:41] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[14:22:41] ===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m (orig) =====
[14:22:47] ===== Start working with [1mfold 1[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m (orig) =====
[14:22:50] ===== Start working with [1mfold 2[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m (orig) =====
[14:22:54] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.7083783149719238[0m
[14:22:54] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[14:22:54] Time left 3584.54 secs

[14:22:54] [1mLayer 1 training 

  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:23:06] [1mSelector_CatBoostGPU[0m fitting and predicting completed
[14:23:07] Start fitting [1mLvl_1_Pipe_0_Mod_0_XGB[0m ...
[14:23:07] ===== Start working with [1mfold 0[0m for [1mLvl_1_Pipe_0_Mod_0_XGB[0m (orig) =====
[14:23:14] ===== Start working with [1mfold 1[0m for [1mLvl_1_Pipe_0_Mod_0_XGB[0m (orig) =====
[14:23:22] ===== Start working with [1mfold 2[0m for [1mLvl_1_Pipe_0_Mod_0_XGB[0m (orig) =====
[14:23:33] Fitting [1mLvl_1_Pipe_0_Mod_0_XGB[0m finished. score = [1m0.7159843444824219[0m
[14:23:33] [1mLvl_1_Pipe_0_Mod_0_XGB[0m fitting and predicting completed
[14:23:33] Time left 3546.40 secs

[14:23:33] [1mLayer 2 training completed.[0m

[14:23:33] Layer [1m3[0m train process start. Time left 3546.40 secs
[14:23:34] Start fitting [1mLvl_2_Pipe_0_Mod_0_CatBoostGPU[0m ...
[14:23:34] ===== Start working with [1mfold 0[0m for [1mLvl_2_Pipe_0_Mod_0_CatBoostGPU[0m (orig) =====


  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:23:39] ===== Start working with [1mfold 1[0m for [1mLvl_2_Pipe_0_Mod_0_CatBoostGPU[0m (orig) =====


  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:23:48] ===== Start working with [1mfold 2[0m for [1mLvl_2_Pipe_0_Mod_0_CatBoostGPU[0m (orig) =====


  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:23:56] Fitting [1mLvl_2_Pipe_0_Mod_0_CatBoostGPU[0m finished. score = [1m0.7170827984809875[0m
[14:23:56] [1mLvl_2_Pipe_0_Mod_0_CatBoostGPU[0m fitting and predicting completed
[14:23:56] Time left 3523.13 secs

[14:23:56] [1mLayer 3 training completed.[0m

[14:23:56] [1mAutoml preset training completed in 76.87 seconds[0m

[14:23:56] Model description:
Models on level 0:
	 3 averaged models Lvl_0_Pipe_0_Mod_0_LinearL2

Models on level 1:
	 3 averaged models Lvl_1_Pipe_0_Mod_0_XGB

Final prediction for new objects (level 2) = 
	 1.00000 * (3 averaged models Lvl_2_Pipe_0_Mod_0_CatBoostGPU) 



In [12]:
gpu_inf = automl_gpu.predict(data[folds==args_fold].reset_index().drop(['index'],axis=1))

  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,


In [13]:
automl_gpu.to_cpu()

Models parameters: {'_init_params': {'task_type': 'GPU', 'devices': '0', 'thread_count': 1, 'random_seed': 42, 'learning_rate': 0.05, 'l2_leaf_reg': 0.01, 'bootstrap_type': 'Bernoulli', 'grow_policy': 'SymmetricTree', 'max_depth': 5, 'min_data_in_leaf': 1, 'one_hot_max_size': 10, 'fold_permutation_block': 1, 'boosting_type': 'Plain', 'od_type': 'Iter', 'max_bin': 32, 'feature_border_type': 'GreedyLogSum', 'nan_mode': 'Min', 'verbose': False, 'max_ctr_complexity': 1, 'num_trees': 3000, 'objective': 'Logloss', 'eval_metric': 'AUC', 'od_wait': 100}, '_object': <_catboost._CatBoost object at 0x7fcd68f53400>, '_is_fitted_': True, '_random_seed': 42, '_learning_rate': 0.05000000074505806, '_tree_count': 774, '_n_features_in': 20, '_prediction_values_change': [2.1750902691001297, 5.076168164338567, 3.220156124006821, 2.9674498896177623, 2.5282828208675676, 3.2260336098832862, 5.2225764808514645, 1.1118635684449916, 1.7029795599176003, 2.911652531558489, 3.6668095603794573, 5.877746382719845, 

In [14]:
cpu_inf = automl_gpu.predict(data[folds==args_fold].reset_index().drop(['index'],axis=1))

  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,


In [15]:
cluster = LocalCUDACluster(rmm_managed_memory=True, CUDA_VISIBLE_DEVICES="0",
                               protocol="ucx", enable_nvlink=True,
                               memory_limit="8GB")
print("dashboard:", cluster.dashboard_link)
client = Client(cluster)
client.run(cudf.set_allocator, "managed")

2022-12-07 14:24:01,301 - distributed.diskutils - INFO - Found stale lock file and directory '/tmp/dask-worker-space/worker-tzrfwv9q', purging
2022-12-07 14:24:01,306 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2022-12-07 14:24:01,306 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize


dashboard: http://127.0.0.1:8787/status


{'ucx://127.0.0.1:41589': None}

In [16]:
task = Task(data_info['task_type'], device='mgpu')

automl_mgpu = TabularAutoMLGPU(
    task = task, 
    timeout = 3600,
    cpu_limit = 1,
    reader_params = {'n_jobs': 1, 'cv': 3, 'random_state': 42, 'npartitions': 2},
    general_params = {'use_algos': ['linear_l2', 'xgb', 'cb']},
    client = client
)

mgpu_fit_pred = automl_mgpu.fit_predict(data[folds!=args_fold].reset_index().drop(['index'],axis=1), roles = roles, verbose = 2)

[14:24:01] Stdout logging level is INFO2.
[14:24:01] Task: binary

[14:24:01] Start automl preset with listed constraints:
[14:24:01] - time: 3600.00 seconds
[14:24:01] - CPU: 1 cores
[14:24:01] - memory: 16 GB

[14:24:01] Train data shape: (431506, 8)
Feats was rejected during automatic roles guess: []
[14:24:02] Layer [1m1[0m train process start. Time left 3599.41 secs
[14:24:03] Start fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m ...
[14:24:03] ===== Start working with [1mfold 0[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m (orig) =====
[14:24:07] ===== Start working with [1mfold 1[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m (orig) =====
[14:24:11] ===== Start working with [1mfold 2[0m for [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m (orig) =====
[14:24:14] Fitting [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m finished. score = [1m0.7083818316459656[0m
[14:24:14] [1mLvl_0_Pipe_0_Mod_0_LinearL2[0m fitting and predicting completed
[14:24:14] Time left 3587.18 secs

[14:24:14] [1mLayer 1 training 

  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
  self._init_pool(data, label, cat_features, text_features, embedding_features, embedding_features_data, pairs, weight,
Default metric period is 5 because AUC is/are not implemented for GPU


[14:24:26] [1mSelector_CatBoostGPU[0m fitting and predicting completed
[14:24:27] Start fitting [1mLvl_1_Pipe_0_Mod_0_XGB[0m ...
[14:24:27] ===== Start working with [1mfold 0[0m for [1mLvl_1_Pipe_0_Mod_0_XGB[0m (orig) =====
THIS IS MGPU


  client.wait_for_workers(n_workers)
[14:24:31] task [xgboost.dask-0]:ucx://127.0.0.1:41589 got new rank 0


[14:24:32] Model Lvl_1_Pipe_0_Mod_0_XGB failed during ml_algo.fit_predict call.

uninitialized_fill_n: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered


Key:       dispatched_train-2b58ca9a-4715-4aca-a900-fe05436406e5
Function:  dispatched_train
args:      ({'tree_method': 'gpu_hist', 'predictor': 'gpu_predictor', 'task': 'train', 'learning_rate': 0.05, 'max_leaves': 244, 'max_depth': 0, 'verbosity': 0, 'reg_alpha': 1, 'reg_lambda': 0.0, 'gamma': 0.0, 'max_bin': 255, 'random_state': 42, 'nthread': 1, 'objective': 'binary:logistic', 'metric': 'auc', 'num_class': 1}, [b'DMLC_NUM_WORKER=1', b'DMLC_TRACKER_URI=192.168.18.5', b'DMLC_TRACKER_PORT=60433', b'DMLC_TASK_ID=[xgboost.dask-0]:ucx://127.0.0.1:41589'], 140519969677216, ['train', 'valid'], [140519969677216, 140518548399920], < could not convert arg to str >, < could not convert arg to str >, < could not convert arg to str >)
kwargs:    {}
Exception: "XGBoostError('uninitialized_fill_n: failed to synchronize: cudaErrorIllegalAddress: an illegal memory access was encountered')"



AssertionError: Pipeline finished with 0 models for some reason.
Probably one or more models failed

In [None]:
mgpu_pred = automl_mgpu.predict(data[folds==args_fold].reset_index().drop(['index'],axis=1))

In [None]:
print(cpu_inf.data.T)
print()
print(gpu_inf.data.T)
print()
print(cpu_pred.data.T)
print()
print(mgpu_pred.data.T)

In [None]:
print(cpu_fit_pred.data.T)
print()
print(gpu_fit_pred.data.T)
print()
print(mgpu_fit_pred.data.T)

In [None]:
automl_mgpu.to_cpu()

In [None]:
mgpu_inf = automl_mgpu.predict(data[folds==args_fold].reset_index().drop(['index'],axis=1))

In [None]:
print(mgpu_inf.data.T)

terminate called after throwing an instance of 'thrust::system::system_error'
  what():  parallel_for failed: cudaErrorIllegalAddress: an illegal memory access was encountered
2022-12-07 14:24:57,983 - distributed.preloading - INFO - Creating preload: dask_cuda.initialize
2022-12-07 14:24:57,983 - distributed.preloading - INFO - Import preload module: dask_cuda.initialize
