# Review errors in the results files `metadataset_errors*.csv`

In [2]:
import pandas as pd

errors_df = pd.read_csv("/home/shared/tabzilla/TabSurvey/metadataset_errors.csv")

In [3]:
errors_df.head()

Unnamed: 0,results_bucket_path,dataset_name,alg_name,hparam_source,trial_number,alg_hparam_id,exp_name,exception
0,results/openml__Census-Income__168340/CatBoost...,openml__Census-Income__168340,CatBoost,random_5_s0,5,CatBoost__seed_0__trial_5,algs-gpu-1-datasets-b_111222_140137_f97f.zip,"Traceback (most recent call last):\n File ""/h..."
1,results/openml__Census-Income__168340/KNN/algs...,openml__Census-Income__168340,KNN,random_1_s0,1,KNN__seed_0__trial_1,algs-cpu-1-datasets-b_111122_061539_9b61.zip,"Traceback (most recent call last):\n File ""/h..."
2,results/openml__Census-Income__168340/KNN/algs...,openml__Census-Income__168340,KNN,random_2_s0,2,KNN__seed_0__trial_2,algs-cpu-1-datasets-b_111122_061539_9b61.zip,"Traceback (most recent call last):\n File ""/h..."
3,results/openml__Census-Income__168340/KNN/algs...,openml__Census-Income__168340,KNN,random_4_s0,4,KNN__seed_0__trial_4,algs-cpu-1-datasets-b_111122_061539_9b61.zip,"Traceback (most recent call last):\n File ""/h..."
4,results/openml__Census-Income__168340/MLP/algs...,openml__Census-Income__168340,MLP,default,0,MLP__seed_0__trial_0,algs-gpu-1-datasets-b_111222_195204_6934.zip,"Traceback (most recent call last):\n File ""/h..."


In [4]:
# how many errors by dataset?
print(f"errors by dataset:\n {errors_df['dataset_name'].value_counts()}")

errors by dataset:
 openml__solar-flare__2068         90
openml__poker-hand__9890          73
openml__Census-Income__168340     72
openml__lung-cancer__146024       71
openml__walking-activity__9945    69
                                  ..
openml__hepatitis__54             60
openml__hill-valley__145847       60
openml__ionosphere__145984        60
openml__iris__59                  60
openml__yeast__145793             60
Name: dataset_name, Length: 82, dtype: int64


In [5]:
# by alg...
print(f"errors by alg:\n {errors_df['alg_name'].value_counts()}")

errors by alg:
 MLP         2460
VIME        2460
CatBoost      62
SVM           38
KNN           22
TabNet        22
XGBoost        7
Name: alg_name, dtype: int64


# Triage errors by alg

In [6]:
errors_by_alg = {}
for alg_name in errors_df["alg_name"].unique():
    errors_by_alg[alg_name] = errors_df.loc[errors_df["alg_name"] == alg_name, "exception"].values

## MLP

In [7]:
alg_name = "MLP"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg MLP: 2460
number of unique errors: 1


In [10]:
# what are the unique errors?
print(errors_by_alg[alg_name][0])

Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 137, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 237, in cross_validation
    loss_history, val_loss_history = curr_model.fit(
  File "/home/shared/tabzilla/TabSurvey/models/mlp.py", line 31, in fit
    return super().fit(X, y, X_val, y_val)
  File "/home/shared/tabzilla/TabSurvey/models/basemodel_torch.py", line 94, in fit
    out = self.model(batch_X.to(self.device))
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
    return forward_call(*input, **kwargs)
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py", line 154, in forward
    raise RuntimeError("module must have its parameters and buffers "
RuntimeError: module must have its parameters and buffers on device cuda:0 (devi

All caught SVM errors are time limits issues, for a single hyperparameter sample. Let's see if this is due to the same hparam sample... 

In [10]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

## VIME

In [11]:
alg_name = "VIME"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg VIME: 154
number of unique errors: 12


In [12]:
# what are the unique errors?
unique_errors = list(set(errors_by_alg[alg_name]))

In [13]:
for i, e in enumerate(unique_errors):
    print(f"error {i}:\n{e}\n")

error 0:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 4


error 1:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 237, in cross_validation
    loss_history, val_loss_history = curr_model.fit(
  File "/home/shared/tabzilla/TabSurvey/models/vime.py", line 47, in fit
    self.fit_self(X_unlab, p_m=self.params["p_m"], alpha=self.params["alpha"])
  File "/home/shared/tabzilla/TabSurvey/models/vime.py", 

Three VIME errors are due to memory, not timeout. let's look at these.

In [14]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")


----exception 0----
occurrences: 30
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 237, in cross_validation
    loss_history, val_loss_history = curr_model.fit(
  File "/home/shared/tabzilla/TabSurvey/models/vime.py", line 47, in fit
    self.fit_self(X_unlab, p_m=self.params["p_m"], alpha=self.params["alpha"])
  File "/home/shared/tabzilla/TabSurvey/models/vime.py", line 136, in fit_self
    x_tilde = torch.tensor(x_tilde).float()
RuntimeError: [enforce fail at alloc_cpu.cpp:73] . DefaultCPUAllocator: can't allocate memory: you tried to allocate 663552000 bytes. Error code 12 (Cannot allocate memory)


this exception occurs on the following datasets:
openml__CIFAR_10__167124
--------
----exception 1----
occurrences: 13
Traceback (most recent call last):
  File "/home/shared/tabzil

some of the hparam sets seem pathological... like `random_2_s0`. But many other hparam sets yield errors.

## TabNet

In [11]:
alg_name = "TabNet"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg TabNet: 22
number of unique errors: 6


In [12]:
# what are the unique errors?
unique_errors = list(set(errors_by_alg[alg_name]))

In [13]:
for i, e in enumerate(unique_errors):
    print(f"error {i}:\n{e}\n")

error 0:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 137, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 3


error 1:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 137, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 2


error 2:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experimen

Only a couple of TabNet errors are not timeout. Let's look at those:

In [18]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

----exception 0----
occurrences: 30
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 237, in cross_validation
    loss_history, val_loss_history = curr_model.fit(
  File "/home/shared/tabzilla/TabSurvey/models/tabnet.py", line 45, in fit
    self.model.fit(
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/pytorch_tabnet/abstract_model.py", line 223, in fit
    self._train_epoch(train_dataloader)
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/pytorch_tabnet/abstract_model.py", line 434, in _train_epoch
    batch_logs = self._train_batch(X, y)
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/pytorch_tabnet/abstract_model.py", line 469, in _train_batch
    output, M_loss = self.network(X)
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/nn

In [75]:
# errors_df.loc[errors_df["alg_name"] == alg_name, "hparam_source"].value_counts()

## KNN

In [19]:
alg_name = "KNN"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg KNN: 99
number of unique errors: 9


In [20]:
# what are the unique errors?
unique_errors = list(set(errors_by_alg[alg_name]))

In [21]:
for i, e in enumerate(unique_errors):
    print(f"error {i}:\n{e}\n")

error 0:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 4


error 1:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 3


error 2:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experimen

All caught KNN errors are time limits issues, for a single hyperparameter sample. Let's see if this is due to the same hparam sample... 

In [22]:
# errors_df.loc[errors_df["alg_name"] == "KNN", "hparam_source"].value_counts()

In [23]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

## CatBoost

In [14]:
alg_name = "CatBoost"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg CatBoost: 62
number of unique errors: 10


In [15]:
# what are the unique errors?
unique_errors = list(set(errors_by_alg[alg_name]))

In [16]:
for i, e in enumerate(unique_errors):
    print(f"error {i}:\n{e}\n")

error 0:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 137, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 2


error 1:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 137, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 3


error 2:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experimen

The MLP errors are not timeout errors... let's look more closely.

In [27]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

----exception 0----
occurrences: 30
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 237, in cross_validation
    loss_history, val_loss_history = curr_model.fit(
  File "/home/shared/tabzilla/TabSurvey/models/mlp.py", line 31, in fit
    return super().fit(X, y, X_val, y_val)
  File "/home/shared/tabzilla/TabSurvey/models/basemodel_torch.py", line 92, in fit
    for i, (batch_X, batch_y) in enumerate(train_loader):
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 368, in __iter__
    return self._get_iterator()
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 314, in _get_iterator
    return _MultiProcessingDataLoaderIter(self)
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/to

## XGBoost

In [28]:
alg_name = "XGBoost"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg XGBoost: 6
number of unique errors: 3


In [29]:
# what are the unique errors?
unique_errors = list(set(errors_by_alg[alg_name]))

In [30]:
for i, e in enumerate(unique_errors):
    print(f"error {i}:\n{e}\n")

error 0:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 8


error 1:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 6


error 2:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experimen

All caught XGB errors are time limits issues.

In [31]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

In [None]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

----exception 0----
occurrences: 30
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 237, in cross_validation
    loss_history, val_loss_history = curr_model.fit(
  File "/home/shared/tabzilla/TabSurvey/models/mlp.py", line 31, in fit
    return super().fit(X, y, X_val, y_val)
  File "/home/shared/tabzilla/TabSurvey/models/basemodel_torch.py", line 92, in fit
    for i, (batch_X, batch_y) in enumerate(train_loader):
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 368, in __iter__
    return self._get_iterator()
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 314, in _get_iterator
    return _MultiProcessingDataLoaderIter(self)
  File "/opt/conda/envs/torch/lib/python3.10/site-packages/to

## LightGBM

In [32]:
alg_name = "LightGBM"

print(f"number of errors for alg {alg_name}: {len(errors_by_alg[alg_name])}")
print(f"number of unique errors: {len(set(errors_by_alg[alg_name]))}")


number of errors for alg LightGBM: 91
number of unique errors: 8


In [33]:
# what are the unique errors?
unique_errors = list(set(errors_by_alg[alg_name]))

In [34]:
for i, e in enumerate(unique_errors):
    print(f"error {i}:\n{e}\n")

error 0:
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 210, in cross_validation
    raise TimeoutException(f"time limit of {time_limit}s reached during fold {i}")
tabzilla_utils.TimeoutException: time limit of 7200s reached during fold 1


error 1:
Traceback (most recent call last):
  File "/opt/conda/envs/gbdt/lib/python3.9/site-packages/joblib/parallel.py", line 822, in dispatch_one_batch
    tasks = self._ready_batches.get(block=False)
  File "/opt/conda/envs/gbdt/lib/python3.9/queue.py", line 168, in get
    raise Empty
_queue.Empty

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.

Some LightGBM errors are not due to time limit...

In [35]:
non_timeout_errors = errors_df.loc[(errors_df["alg_name"] == alg_name) & (~errors_df["exception"].str.contains("Timeout")) , "exception"].unique() #   "dataset_name", "hparam_source",

for i, e in enumerate(non_timeout_errors):
    print(f"----exception {i}----")
    print(f"occurrences: {len(errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)])}")
    print(e + "\n")

    er_datasets = errors_df.loc[(errors_df['alg_name'] == alg_name) & (errors_df['exception']==e)]["dataset_name"].unique()
    print("this exception occurs on the following datasets:")
    for d in er_datasets:
        print(d)

    print(f"--------")

----exception 0----
occurrences: 1
Traceback (most recent call last):
  File "/home/shared/tabzilla/TabSurvey/tabzilla_experiment.py", line 136, in __call__
    result = cross_validation(model, self.dataset, self.time_limit)
  File "/home/shared/tabzilla/TabSurvey/tabzilla_utils.py", line 247, in cross_validation
    train_predictions, train_probs = curr_model.predict_wrapper(X_train)
  File "/home/shared/tabzilla/TabSurvey/models/basemodel.py", line 104, in predict_wrapper
    self.predictions, self.prediction_probabilities = self.predict(X)
  File "/home/shared/tabzilla/TabSurvey/models/basemodel.py", line 144, in predict
    self.prediction_probabilities = self.predict_proba(X)
  File "/home/shared/tabzilla/TabSurvey/models/tree_models.py", line 237, in predict_proba
    probabilities = self.model.predict(X)
  File "/opt/conda/envs/gbdt/lib/python3.9/site-packages/lightgbm/basic.py", line 3538, in predict
    return predictor.predict(data, start_iteration, num_iteration,
  File "/op

# Errors by Dataset