In [1]:
# !pip install papermill

In [2]:
from datetime import datetime
import json

def now():
    now = datetime.now()
    yyyymmdd_hhmmss_part = now.strftime('%Y-%m-%d %H:%M:%S')
    ms_part = f'{int(now.microsecond / 1000):03d}'
    return f'{yyyymmdd_hhmmss_part},{ms_part}'

def load_results(results_filename):
    with open(results_filename, 'r', encoding='utf-8') as f:
        content = json.load(f)
        f1_weighted = content["leaderboard"][0]["score_val"]
        fit_time = content["leaderboard"][0]["fit_time"]
    return f'{f1_weighted:.6f}', f'{fit_time:.3f}'

In [3]:
from pathlib import Path
from time import sleep
from tqdm.notebook import tqdm
import os
import papermill as pm

errors = {}

base_time = 60

# for kind in tqdm(["Binary", "Multiclass"], desc='Kind', leave='False'):
for kind in tqdm(["Multiclass", "Binary"], desc='Kind', leave='False'):

    base_folder = Path(f"2025-06-13/{kind}")
    
    # Find all .parquet files recursively
    parquet_files = list(base_folder.rglob("*.parquet"))
    
    # Sort by file size (ascending)
    parquet_files_sorted = sorted(parquet_files, key=lambda p: os.path.getsize(p))
    
    # Iterate
    for file in tqdm(parquet_files_sorted, desc='File', leave='False'):
        
        base_filename = os.path.join(file.parent, file.stem)
        results_filename = f'{base_filename}_results.json'
        size_mb = f'{(os.path.getsize(file) / (1024 * 1024)):.3f} MB'
        input_notebook = 'ag_code.ipynb'
        output_notebook = f'{base_filename}.ipynb'
        
        max_time = base_time
        success = False

        while not success and max_time <= 3840:
        
            try:

                # if 'NIDS_NF-UQ-NIDS-v2' in str(file) or 'IoT_23' in str(file) or 'KDD_Cup_1999' in str(file):
                #     break
                # if 'ToN_IoT' not in str(file):
                #     break
        
                tqdm.write(f'[{now()}] Processing | FILE = {str(file.stem):<64} | FILE_SIZE   = {size_mb:<12} | MAX_TIME = {max_time:<6}')
            
                if not Path(results_filename).exists():
    
                    pm.execute_notebook(
                        input_notebook,
                        output_notebook,
                        parameters=dict(
                            BASE_FILENAME=base_filename,
                            TARGET_COL='label',
                            TEST_SIZE=0.2,
                            TIME_LIMIT=max_time,
                            EVAL_METRIC='f1_weighted',
                            PRESET='medium_quality',
                            N_FOLDS=5,
                            RANDOM_STATE=42
                        )
                    )
            
                f1_weighted, fit_time = load_results(results_filename)
                tqdm.write(f'[{now()}] Processed  | FILE = {str(file.stem):<64} | F1_WEIGHTED = {f1_weighted:<12} | FIT_TIME = {fit_time:<6}')
                success = True
                
            except Exception as e:
                # if 'Consider specifying a larger time_limit.' in str(e):
                if 'raise_on_no_models_fitted' in str(e) or 'time_limit' in str(e):
                    max_time = 2 * max_time
                else:
                    tqdm.write(f'[{now()}] ERROR      | FILE = {str(file.stem):<64} | EXCEPTION = {str(e)}')
                    errors[str(file.stem)] = e
                    break

Kind:   0%|          | 0/2 [00:00<?, ?it/s]

File:   0%|          | 0/61 [00:00<?, ?it/s]

[2025-06-21 02:12:59,127] Processing | FILE = ToN_IoT_IoT_Garage_Door_Multiclass                               | FILE_SIZE   = 0.003 MB     | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 02:13:02,001] ERROR      | FILE = ToN_IoT_IoT_Garage_Door_Multiclass                               | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [3]":
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 21
     18 from autogluon.tabular import TabularDataset
     19 from sklearn.model_selection import train_test_split
---> 21 train_df, test_df = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[TARGET_COL])
     23 train_data, test_data = TabularDataset(train_df), TabularDataset(test_df)
     25 # Step 4: Print dtypes

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_paramet

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 02:13:04,313] ERROR      | FILE = ToN_IoT_IoT_Motion_Light_Multiclass                              | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [3]":
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 21
     18 from autogluon.tabular import TabularDataset
     19 from sklearn.model_selection import train_test_split
---> 21 train_df, test_df = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[TARGET_COL])
     23 train_data, test_data = TabularDataset(train_df), TabularDataset(test_df)
     25 # Step 4: Print dtypes

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_paramet

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750482887 on cpu 22 ***
PC: @     0x7f419ed25e2e  (unknown)  epoll_wait
    @     0x7f3fe0bac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7f419ec42520  (unknown)  (unknown)
[2025-06-21 02:14:47,569 E 23958 23958] logging.cc:440: *** SIGTERM received at time=1750482887 on cpu 22 ***
[2025-06-21 02:14:47,569 E 23958 23958] logging.cc:440: PC: @     0x7f419ed25e2e  (unknown)  epoll_wait
[2025-06-21 02:14:47,571 E 23958 23958] logging.cc:440:     @     0x7f3fe0bac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 02:14:47,571 E 23958 23958] logging.cc:440:     @     0x7f419ec42520  (unknown)  (unknown)


[2025-06-21 02:14:50,285] ERROR      | FILE = KDD_Cup_1999_Multiclass                                          | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [7]":
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[7], line 136
    134     kwargs['pos_label'] = use_pos_label
    135 try:
--> 136     score = average_precision_score(y_true, y_proba, **kwargs)
    137 except ValueError:
    138     score = np.nan

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_parameter_validation=(
    213             prefer_skip_nested_validation or global_skip_validation
    214         )
    215     ):
--> 216         return func(*args, **

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750482970 on cpu 22 ***
PC: @     0x7566ad725e2e  (unknown)  epoll_wait
    @     0x7564e9dac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7566ad642520  (unknown)  (unknown)
[2025-06-21 02:16:10,625 E 27125 27125] logging.cc:440: *** SIGTERM received at time=1750482970 on cpu 22 ***
[2025-06-21 02:16:10,625 E 27125 27125] logging.cc:440: PC: @     0x7566ad725e2e  (unknown)  epoll_wait
[2025-06-21 02:16:10,628 E 27125 27125] logging.cc:440:     @     0x7564e9dac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 02:16:10,628 E 27125 27125] logging.cc:440:     @     0x7566ad642520  (unknown)  (unknown)


[2025-06-21 02:16:13,348] Processed  | FILE = ToN_IoT_Windows_7_Multiclass                                     | F1_WEIGHTED = 0.995368     | FIT_TIME = 7.884 
[2025-06-21 02:16:13,349] Processing | FILE = BCCC_CIC-BCCC-NRC-IoT-2022_Multiclass                            | FILE_SIZE   = 8.331 MB     | MAX_TIME = 60    
[2025-06-21 02:16:13,350] Processed  | FILE = BCCC_CIC-BCCC-NRC-IoT-2022_Multiclass                            | F1_WEIGHTED = 0.999971     | FIT_TIME = 2.700 
[2025-06-21 02:16:13,350] Processing | FILE = NIDS_NF-ToN-IoT_Multiclass                                       | FILE_SIZE   = 9.031 MB     | MAX_TIME = 60    
[2025-06-21 02:16:13,351] Processed  | FILE = NIDS_NF-ToN-IoT_Multiclass                                       | F1_WEIGHTED = 0.587780     | FIT_TIME = 40.395
[2025-06-21 02:16:13,351] Processing | FILE = ToN_IoT_Windows_10_Multiclass                                    | FILE_SIZE   = 11.665 MB    | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750483055 on cpu 23 ***
PC: @     0x74e4b9125e2e  (unknown)  epoll_wait
    @     0x74e2ee7ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x74e4b9042520  (unknown)  (unknown)
[2025-06-21 02:17:35,516 E 31859 31859] logging.cc:440: *** SIGTERM received at time=1750483055 on cpu 23 ***
[2025-06-21 02:17:35,516 E 31859 31859] logging.cc:440: PC: @     0x74e4b9125e2e  (unknown)  epoll_wait
[2025-06-21 02:17:35,518 E 31859 31859] logging.cc:440:     @     0x74e2ee7ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 02:17:35,519 E 31859 31859] logging.cc:440:     @     0x74e4b9042520  (unknown)  (unknown)


[2025-06-21 02:17:38,237] Processed  | FILE = ToN_IoT_Windows_10_Multiclass                                    | F1_WEIGHTED = 0.984968     | FIT_TIME = 49.010
[2025-06-21 02:17:38,238] Processing | FILE = EDGE-IIOTSET_DNN-EdgeIIoT_Multiclass                             | FILE_SIZE   = 16.021 MB    | MAX_TIME = 60    
[2025-06-21 02:17:38,239] Processed  | FILE = EDGE-IIOTSET_DNN-EdgeIIoT_Multiclass                             | F1_WEIGHTED = 0.944284     | FIT_TIME = 20.333
[2025-06-21 02:17:38,239] Processing | FILE = NIDS_NF-UNSW-NB15_Multiclass                                     | FILE_SIZE   = 16.890 MB    | MAX_TIME = 60    
[2025-06-21 02:17:38,240] Processed  | FILE = NIDS_NF-UNSW-NB15_Multiclass                                     | F1_WEIGHTED = 0.971884     | FIT_TIME = 32.036
[2025-06-21 02:17:38,241] Processing | FILE = IoT_Network_Intrusion_Macro_Multiclass                           | FILE_SIZE   = 17.502 MB    | MAX_TIME = 60    
[2025-06-21 02:17:38,241] Processed  | F

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750483179 on cpu 3 ***
PC: @     0x761732b25e2e  (unknown)  epoll_wait
    @     0x76155d7ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x761732a42520  (unknown)  (unknown)
[2025-06-21 02:19:39,911 E 2729 2729] logging.cc:440: *** SIGTERM received at time=1750483179 on cpu 3 ***
[2025-06-21 02:19:39,912 E 2729 2729] logging.cc:440: PC: @     0x761732b25e2e  (unknown)  epoll_wait
[2025-06-21 02:19:39,914 E 2729 2729] logging.cc:440:     @     0x76155d7ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 02:19:39,914 E 2729 2729] logging.cc:440:     @     0x761732a42520  (unknown)  (unknown)


[2025-06-21 02:19:42,528] ERROR      | FILE = IoT_23_Multiclass                                                | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [7]":
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
Cell In[7], line 136
    134     kwargs['pos_label'] = use_pos_label
    135 try:
--> 136     score = average_precision_score(y_true, y_proba, **kwargs)
    137 except ValueError:
    138     score = np.nan

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_parameter_validation=(
    213             prefer_skip_nested_validation or global_skip_validation
    214         )
    215     ):
--> 216         return func(*args, **

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 02:23:15,723] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Multiclass                                    | FILE_SIZE   = 1750.775 MB  | MAX_TIME = 120   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 02:27:14,027] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Multiclass                                    | FILE_SIZE   = 1750.775 MB  | MAX_TIME = 240   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 02:32:41,044] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Multiclass                                    | FILE_SIZE   = 1750.775 MB  | MAX_TIME = 480   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750484778 on cpu 5 ***
PC: @     0x726c34925e2e  (unknown)  epoll_wait
    @     0x726a6d1ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x726c34842520  (unknown)  (unknown)
[2025-06-21 02:46:18,688 E 6833 6833] logging.cc:440: *** SIGTERM received at time=1750484778 on cpu 5 ***
[2025-06-21 02:46:18,689 E 6833 6833] logging.cc:440: PC: @     0x726c34925e2e  (unknown)  epoll_wait
[2025-06-21 02:46:18,691 E 6833 6833] logging.cc:440:     @     0x726a6d1ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 02:46:18,691 E 6833 6833] logging.cc:440:     @     0x726c34842520  (unknown)  (unknown)


[2025-06-21 02:46:21,507] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Multiclass                                    | FILE_SIZE   = 1750.775 MB  | MAX_TIME = 960   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750485588 on cpu 3 ***
PC: @     0x7e9d26f25e2e  (unknown)  epoll_wait
    @     0x7e9b709ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7e9d26e42520  (unknown)  (unknown)
[2025-06-21 02:59:48,754 E 9761 9761] logging.cc:440: *** SIGTERM received at time=1750485588 on cpu 3 ***
[2025-06-21 02:59:48,754 E 9761 9761] logging.cc:440: PC: @     0x7e9d26f25e2e  (unknown)  epoll_wait
[2025-06-21 02:59:48,756 E 9761 9761] logging.cc:440:     @     0x7e9b709ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 02:59:48,757 E 9761 9761] logging.cc:440:     @     0x7e9d26e42520  (unknown)  (unknown)


[2025-06-21 02:59:51,573] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Multiclass                                    | FILE_SIZE   = 1750.775 MB  | MAX_TIME = 1920  


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750486418 on cpu 22 ***
PC: @     0x7b13fe525e2e  (unknown)  epoll_wait
    @     0x7b12489ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7b13fe442520  (unknown)  (unknown)
[2025-06-21 03:13:38,465 E 12615 12615] logging.cc:440: *** SIGTERM received at time=1750486418 on cpu 22 ***
[2025-06-21 03:13:38,465 E 12615 12615] logging.cc:440: PC: @     0x7b13fe525e2e  (unknown)  epoll_wait
[2025-06-21 03:13:38,468 E 12615 12615] logging.cc:440:     @     0x7b12489ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 03:13:38,468 E 12615 12615] logging.cc:440:     @     0x7b13fe442520  (unknown)  (unknown)


[2025-06-21 03:13:41,284] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Multiclass                                    | FILE_SIZE   = 1750.775 MB  | MAX_TIME = 3840  


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750487235 on cpu 5 ***
PC: @     0x7a0fbb925e2e  (unknown)  epoll_wait
    @     0x7a0e04fac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7a0fbb842520  (unknown)  (unknown)
[2025-06-21 03:27:15,674 E 15568 15568] logging.cc:440: *** SIGTERM received at time=1750487235 on cpu 5 ***
[2025-06-21 03:27:15,674 E 15568 15568] logging.cc:440: PC: @     0x7a0fbb925e2e  (unknown)  epoll_wait
[2025-06-21 03:27:15,677 E 15568 15568] logging.cc:440:     @     0x7a0e04fac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 03:27:15,677 E 15568 15568] logging.cc:440:     @     0x7a0fbb842520  (unknown)  (unknown)


File:   0%|          | 0/61 [00:00<?, ?it/s]

[2025-06-21 03:27:18,520] Processing | FILE = ToN_IoT_IoT_Garage_Door_Binary                                   | FILE_SIZE   = 0.002 MB     | MAX_TIME = 60    
[2025-06-21 03:27:18,521] Processed  | FILE = ToN_IoT_IoT_Garage_Door_Binary                                   | F1_WEIGHTED = 0.642857     | FIT_TIME = 0.836 
[2025-06-21 03:27:18,522] Processing | FILE = ToN_IoT_IoT_Motion_Light_Binary                                  | FILE_SIZE   = 0.003 MB     | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 03:27:21,052] ERROR      | FILE = ToN_IoT_IoT_Motion_Light_Binary                                  | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [3]":
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 21
     18 from autogluon.tabular import TabularDataset
     19 from sklearn.model_selection import train_test_split
---> 21 train_df, test_df = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[TARGET_COL])
     23 train_data, test_data = TabularDataset(train_df), TabularDataset(test_df)
     25 # Step 4: Print dtypes

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_paramet

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750487325 on cpu 0 ***
PC: @     0x79d8bd925e2e  (unknown)  epoll_wait
    @     0x79d7095ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x79d8bd842520  (unknown)  (unknown)
[2025-06-21 03:28:45,082 E 18519 18519] logging.cc:440: *** SIGTERM received at time=1750487325 on cpu 0 ***
[2025-06-21 03:28:45,082 E 18519 18519] logging.cc:440: PC: @     0x79d8bd925e2e  (unknown)  epoll_wait
[2025-06-21 03:28:45,084 E 18519 18519] logging.cc:440:     @     0x79d7095ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 03:28:45,084 E 18519 18519] logging.cc:440:     @     0x79d8bd842520  (unknown)  (unknown)


[2025-06-21 03:28:47,707] Processed  | FILE = ToN_IoT_Windows_7_Binary                                         | F1_WEIGHTED = 0.995557     | FIT_TIME = 32.222
[2025-06-21 03:28:47,708] Processing | FILE = BCCC_CIC-BCCC-NRC-IoT-2022_Binary                                | FILE_SIZE   = 8.331 MB     | MAX_TIME = 60    
[2025-06-21 03:28:47,709] Processed  | FILE = BCCC_CIC-BCCC-NRC-IoT-2022_Binary                                | F1_WEIGHTED = 0.999971     | FIT_TIME = 2.705 
[2025-06-21 03:28:47,709] Processing | FILE = ToN_IoT_Windows_10_Binary                                        | FILE_SIZE   = 11.665 MB    | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750487415 on cpu 22 ***
PC: @     0x7bfdb2b25e2e  (unknown)  epoll_wait
    @     0x7bfbc77ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7bfdb2a42520  (unknown)  (unknown)
[2025-06-21 03:30:15,677 E 26339 26339] logging.cc:440: *** SIGTERM received at time=1750487415 on cpu 22 ***
[2025-06-21 03:30:15,677 E 26339 26339] logging.cc:440: PC: @     0x7bfdb2b25e2e  (unknown)  epoll_wait
[2025-06-21 03:30:15,679 E 26339 26339] logging.cc:440:     @     0x7bfbc77ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 03:30:15,679 E 26339 26339] logging.cc:440:     @     0x7bfdb2a42520  (unknown)  (unknown)


[2025-06-21 03:30:18,402] Processed  | FILE = ToN_IoT_Windows_10_Binary                                        | F1_WEIGHTED = 0.984419     | FIT_TIME = 35.083
[2025-06-21 03:30:18,403] Processing | FILE = EDGE-IIOTSET_DNN-EdgeIIoT_Binary                                 | FILE_SIZE   = 16.024 MB    | MAX_TIME = 60    
[2025-06-21 03:30:18,404] Processed  | FILE = EDGE-IIOTSET_DNN-EdgeIIoT_Binary                                 | F1_WEIGHTED = 1.000000     | FIT_TIME = 4.377 
[2025-06-21 03:30:18,404] Processing | FILE = NIDS_NF-UNSW-NB15_Binary                                         | FILE_SIZE   = 16.813 MB    | MAX_TIME = 60    
[2025-06-21 03:30:18,405] Processed  | FILE = NIDS_NF-UNSW-NB15_Binary                                         | F1_WEIGHTED = 0.987532     | FIT_TIME = 42.897
[2025-06-21 03:30:18,405] Processing | FILE = IoT_Network_Intrusion_Macro_Binary                               | FILE_SIZE   = 17.502 MB    | MAX_TIME = 60    
[2025-06-21 03:30:18,406] Processed  | F

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 03:30:21,597] ERROR      | FILE = BCCC_CIC-BCCC-NRC-TONIoT-2021_Binary                             | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [3]":
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 21
     18 from autogluon.tabular import TabularDataset
     19 from sklearn.model_selection import train_test_split
---> 21 train_df, test_df = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[TARGET_COL])
     23 train_data, test_data = TabularDataset(train_df), TabularDataset(test_df)
     25 # Step 4: Print dtypes

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_paramet

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 03:30:26,559] ERROR      | FILE = BCCC_CIC-BCCC-NRC-UQ-IOT-2022_Binary                             | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [3]":
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 21
     18 from autogluon.tabular import TabularDataset
     19 from sklearn.model_selection import train_test_split
---> 21 train_df, test_df = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[TARGET_COL])
     23 train_data, test_data = TabularDataset(train_df), TabularDataset(test_df)
     25 # Step 4: Print dtypes

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_paramet

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 03:30:32,360] ERROR      | FILE = BCCC_CIC-BCCC-NRC-IoT-2023-Original_Training_and_Testing_Binary  | EXCEPTION = 
---------------------------------------------------------------------------
Exception encountered at "In [3]":
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[3], line 21
     18 from autogluon.tabular import TabularDataset
     19 from sklearn.model_selection import train_test_split
---> 21 train_df, test_df = train_test_split(df, test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=df[TARGET_COL])
     23 train_data, test_data = TabularDataset(train_df), TabularDataset(test_df)
     25 # Step 4: Print dtypes

File ~/miniforge3/envs/ag/lib/python3.11/site-packages/sklearn/utils/_param_validation.py:216, in validate_params.<locals>.decorator.<locals>.wrapper(*args, **kwargs)
    210 try:
    211     with config_context(
    212         skip_paramet

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 03:33:55,660] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Binary                                        | FILE_SIZE   = 1816.803 MB  | MAX_TIME = 120   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-21 03:37:46,808] Processing | FILE = NIDS_NF-UQ-NIDS-v2_Binary                                        | FILE_SIZE   = 1816.803 MB  | MAX_TIME = 240   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750490818 on cpu 2 ***
PC: @     0x71dd4a925e2e  (unknown)  epoll_wait
    @     0x71db90bac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x71dd4a842520  (unknown)  (unknown)
[2025-06-21 04:26:58,687 E 2455 2455] logging.cc:440: *** SIGTERM received at time=1750490818 on cpu 2 ***
[2025-06-21 04:26:58,687 E 2455 2455] logging.cc:440: PC: @     0x71dd4a925e2e  (unknown)  epoll_wait
[2025-06-21 04:26:58,689 E 2455 2455] logging.cc:440:     @     0x71db90bac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-21 04:26:58,689 E 2455 2455] logging.cc:440:     @     0x71dd4a842520  (unknown)  (unknown)


[2025-06-21 04:27:01,504] Processed  | FILE = NIDS_NF-UQ-NIDS-v2_Binary                                        | F1_WEIGHTED = 0.551050     | FIT_TIME = 179.600


In [4]:
from pprint import pprint

pprint(errors, indent=4)

    'IoT_23_Multiclass': PapermillExecutionError(6, 7, 'from sklearn.metrics import (\n    f1_score, precision_score, recall_score, roc_auc_score,\n    average_precision_score, accuracy_score, matthews_corrcoef, log_loss,\n    confusion_matrix\n)\nimport numpy as np\n\n# ------------------------------\n# Setup\n# ------------------------------\nmodel_names = predictor.model_names()\ny_true = test_data[TARGET_COL]\n\n# Determine valid averaging strategies and ROC modes\nif problem_type == \'binary\':\n    averages = [\'binary\', \'micro\', \'macro\', \'weighted\']\n    multi_class_args = [None]\n    use_pos_label = \'Malign\' if \'Malign\' in y_true.unique() else None\nelse:\n    averages = [\'micro\', \'macro\', \'weighted\']\n    multi_class_args = [\'ovo\', \'ovr\']\n    use_pos_label = None  # not applicable in multiclass\n\n# Fix key names: use \'binary\' instead of \'None\' for binary mode\nroc_auc_keys = [\n    (f\'{mode}_{avg}\' if mode else f\'binary_{avg}\')\n    for mode in m