In [1]:
# !pip install papermill

In [2]:
from datetime import datetime
import json

def now():
    now = datetime.now()
    yyyymmdd_hhmmss_part = now.strftime('%Y-%m-%d %H:%M:%S')
    ms_part = f'{int(now.microsecond / 1000):03d}'
    return f'{yyyymmdd_hhmmss_part},{ms_part}'

def load_results(results_filename):
    with open(results_filename, 'r', encoding='utf-8') as f:
        content = json.load(f)
        f1_weighted = content["leaderboard"][0]["score_val"]
        fit_time = content["leaderboard"][0]["fit_time"]
    return f'{f1_weighted:.6f}', f'{fit_time:.3f}'

In [None]:
from pathlib import Path
from time import sleep
from tqdm.notebook import tqdm
import os
import papermill as pm

base_time = 60

# for kind in tqdm(["Binary", "Multiclass"], desc='Kind', leave='False'):
for kind in tqdm(["Multiclass", "Binary"], desc='Kind', leave='False'):

    base_folder = Path(f"2025-06-13/{kind}")
    
    # Find all .parquet files recursively
    parquet_files = list(base_folder.rglob("*.parquet"))
    
    # Sort by file size (ascending)
    parquet_files_sorted = sorted(parquet_files, key=lambda p: os.path.getsize(p))
    
    # Iterate
    for file in tqdm(parquet_files_sorted, desc='File', leave='False'):
        
        base_filename = os.path.join(file.parent, file.stem)
        results_filename = f'{base_filename}_results.json'
        size_mb = f'{(os.path.getsize(file) / (1024 * 1024)):.3f} MB'
        input_notebook = 'ag_code.ipynb'
        output_notebook = f'{base_filename}.ipynb'
        
        max_time = base_time
        success = False

        while not success and max_time <= 3840:
        
            try:

                if 'NIDS_NF-UQ-NIDS-v2' in str(file) or 'IoT_23' in str(file) or 'KDD_Cup_1999' in str(file):
                    break
        
                tqdm.write(f'[{now()}] Processing | FILE = {str(file.stem):<64} | FILE_SIZE   = {size_mb:<12} | MAX_TIME = {max_time:<6}')
            
                if not Path(results_filename).exists():
    
                    pm.execute_notebook(
                        input_notebook,
                        output_notebook,
                        parameters=dict(
                            BASE_FILENAME=base_filename,
                            TARGET_COL='label',
                            TEST_SIZE=0.2,
                            TIME_LIMIT=max_time,
                            EVAL_METRIC='f1_weighted',
                            PRESET='medium_quality',
                            N_FOLDS=5,
                            RANDOM_STATE=42
                        )
                    )
            
                f1_weighted, fit_time = load_results(results_filename)
                tqdm.write(f'[{now()}] Processed  | FILE = {str(file.stem):<64} | F1_WEIGHTED = {f1_weighted:<12} | FIT_TIME = {fit_time:<6}')
                success = True
                
            except Exception as e:
                # if 'Consider specifying a larger time_limit.' in str(e):
                if 'raise_on_no_models_fitted' in str(e) or 'time_limit' in str(e):
                    max_time = 2 * max_time
                else:
                    tqdm.write(f'[{now()}] ERROR      | FILE = {str(file.stem):<64} | EXCEPTION = {str(e)}')
                    break

Kind:   0%|          | 0/2 [00:00<?, ?it/s]

File:   0%|          | 0/46 [00:00<?, ?it/s]

[2025-06-19 02:26:05,310] Processing | FILE = EDGE-IIOTSET_ML-EdgeIIoT_Multiclass                              | FILE_SIZE   = 2.063 MB     | MAX_TIME = 60    
[2025-06-19 02:26:05,311] Processed  | FILE = EDGE-IIOTSET_ML-EdgeIIoT_Multiclass                              | F1_WEIGHTED = 0.936288     | FIT_TIME = 49.432
[2025-06-19 02:26:05,311] Processing | FILE = NIDS_NF-BoT-IoT_Multiclass                                       | FILE_SIZE   = 2.134 MB     | MAX_TIME = 60    
[2025-06-19 02:26:05,311] Processed  | FILE = NIDS_NF-BoT-IoT_Multiclass                                       | F1_WEIGHTED = 0.955481     | FIT_TIME = 48.966
[2025-06-19 02:26:05,311] Processing | FILE = MQTT_IoT_IDS2020_BiflowFeatures_Multiclass                       | FILE_SIZE   = 3.081 MB     | MAX_TIME = 60    
[2025-06-19 02:26:05,312] Processed  | FILE = MQTT_IoT_IDS2020_BiflowFeatures_Multiclass                       | F1_WEIGHTED = 0.998471     | FIT_TIME = 43.100
[2025-06-19 02:26:05,312] Processing | F

File:   0%|          | 0/46 [00:00<?, ?it/s]

[2025-06-19 02:26:05,333] Processing | FILE = NIDS_NF-BoT-IoT_Binary                                           | FILE_SIZE   = 1.825 MB     | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750310930 on cpu 6 ***
PC: @     0x72c8ea725e2e  (unknown)  epoll_wait
    @     0x72c7757ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x72c8ea642520  (unknown)  (unknown)
[2025-06-19 02:28:50,655 E 25466 25466] logging.cc:440: *** SIGTERM received at time=1750310930 on cpu 6 ***
[2025-06-19 02:28:50,655 E 25466 25466] logging.cc:440: PC: @     0x72c8ea725e2e  (unknown)  epoll_wait
[2025-06-19 02:28:50,657 E 25466 25466] logging.cc:440:     @     0x72c7757ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-19 02:28:50,657 E 25466 25466] logging.cc:440:     @     0x72c8ea642520  (unknown)  (unknown)


[2025-06-19 02:28:53,278] Processed  | FILE = NIDS_NF-BoT-IoT_Binary                                           | F1_WEIGHTED = 0.993885     | FIT_TIME = 33.135
[2025-06-19 02:28:53,279] Processing | FILE = EDGE-IIOTSET_ML-EdgeIIoT_Binary                                  | FILE_SIZE   = 2.073 MB     | MAX_TIME = 60    
[2025-06-19 02:28:53,280] Processed  | FILE = EDGE-IIOTSET_ML-EdgeIIoT_Binary                                  | F1_WEIGHTED = 1.000000     | FIT_TIME = 1.165 
[2025-06-19 02:28:53,280] Processing | FILE = MQTT_IoT_IDS2020_BiflowFeatures_Binary                           | FILE_SIZE   = 3.079 MB     | MAX_TIME = 60    
[2025-06-19 02:28:53,282] Processed  | FILE = MQTT_IoT_IDS2020_BiflowFeatures_Binary                           | F1_WEIGHTED = 0.998966     | FIT_TIME = 24.216
[2025-06-19 02:28:53,282] Processing | FILE = MQTT_IoT_IDS2020_UniflowFeatures_Binary                          | FILE_SIZE   = 3.864 MB     | MAX_TIME = 60    
[2025-06-19 02:28:53,282] Processed  | F

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750311117 on cpu 0 ***
PC: @     0x702e2b325e2e  (unknown)  epoll_wait
    @     0x702c99bac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x702e2b242520  (unknown)  (unknown)
[2025-06-19 02:31:57,053 E 29758 29758] logging.cc:440: *** SIGTERM received at time=1750311117 on cpu 0 ***
[2025-06-19 02:31:57,053 E 29758 29758] logging.cc:440: PC: @     0x702e2b325e2e  (unknown)  epoll_wait
[2025-06-19 02:31:57,055 E 29758 29758] logging.cc:440:     @     0x702c99bac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-19 02:31:57,055 E 29758 29758] logging.cc:440:     @     0x702e2b242520  (unknown)  (unknown)


[2025-06-19 02:31:59,677] Processed  | FILE = NIDS_NF-ToN-IoT_Binary                                           | F1_WEIGHTED = 0.998503     | FIT_TIME = 45.190
[2025-06-19 02:31:59,678] Processing | FILE = BCCC_CIC-BCCC-NRC-IoT-2022_Binary                                | FILE_SIZE   = 8.331 MB     | MAX_TIME = 60    
[2025-06-19 02:31:59,679] Processed  | FILE = BCCC_CIC-BCCC-NRC-IoT-2022_Binary                                | F1_WEIGHTED = 0.999971     | FIT_TIME = 2.705 
[2025-06-19 02:31:59,679] Processing | FILE = EDGE-IIOTSET_DNN-EdgeIIoT_Binary                                 | FILE_SIZE   = 16.024 MB    | MAX_TIME = 60    
[2025-06-19 02:31:59,679] Processed  | FILE = EDGE-IIOTSET_DNN-EdgeIIoT_Binary                                 | F1_WEIGHTED = 1.000000     | FIT_TIME = 4.377 
[2025-06-19 02:31:59,680] Processing | FILE = NIDS_NF-UNSW-NB15_Binary                                         | FILE_SIZE   = 16.813 MB    | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750311346 on cpu 11 ***
PC: @     0x7f51b2725e2e  (unknown)  epoll_wait
    @     0x7f50101ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7f51b2642520  (unknown)  (unknown)
[2025-06-19 02:35:46,519 E 516 516] logging.cc:440: *** SIGTERM received at time=1750311346 on cpu 11 ***
[2025-06-19 02:35:46,520 E 516 516] logging.cc:440: PC: @     0x7f51b2725e2e  (unknown)  epoll_wait
[2025-06-19 02:35:46,522 E 516 516] logging.cc:440:     @     0x7f50101ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-19 02:35:46,522 E 516 516] logging.cc:440:     @     0x7f51b2642520  (unknown)  (unknown)


[2025-06-19 02:35:49,141] Processed  | FILE = NIDS_NF-UNSW-NB15_Binary                                         | F1_WEIGHTED = 0.987532     | FIT_TIME = 42.897
[2025-06-19 02:35:49,143] Processing | FILE = IoT_Network_Intrusion_Macro_Binary                               | FILE_SIZE   = 17.502 MB    | MAX_TIME = 60    
[2025-06-19 02:35:49,143] Processed  | FILE = IoT_Network_Intrusion_Macro_Binary                               | F1_WEIGHTED = 0.985255     | FIT_TIME = 39.021
[2025-06-19 02:35:49,143] Processing | FILE = IoT_Network_Intrusion_Micro_Binary                               | FILE_SIZE   = 17.502 MB    | MAX_TIME = 60    
[2025-06-19 02:35:49,144] Processed  | FILE = IoT_Network_Intrusion_Micro_Binary                               | F1_WEIGHTED = 0.988501     | FIT_TIME = 39.584
[2025-06-19 02:35:49,144] Processing | FILE = N_BaIoT_Ennio_Doorbell_Binary                                    | FILE_SIZE   = 25.492 MB    | MAX_TIME = 60    
[2025-06-19 02:35:49,145] Processed  | F

Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750311564 on cpu 23 ***
PC: @     0x743ad2325e2e  (unknown)  epoll_wait
    @     0x7438d77ac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x743ad2242520  (unknown)  (unknown)
[2025-06-19 02:39:24,791 E 3892 3892] logging.cc:440: *** SIGTERM received at time=1750311564 on cpu 23 ***
[2025-06-19 02:39:24,791 E 3892 3892] logging.cc:440: PC: @     0x743ad2325e2e  (unknown)  epoll_wait
[2025-06-19 02:39:24,794 E 3892 3892] logging.cc:440:     @     0x7438d77ac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-19 02:39:24,794 E 3892 3892] logging.cc:440:     @     0x743ad2242520  (unknown)  (unknown)


[2025-06-19 02:39:27,515] Processed  | FILE = NIDS_NF-UNSW-NB15-v2_Binary                                      | F1_WEIGHTED = 0.996850     | FIT_TIME = 40.011
[2025-06-19 02:39:27,517] Processing | FILE = NIDS_NF-CSE-CIC-IDS2018_Binary                                   | FILE_SIZE   = 59.959 MB    | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

*** SIGTERM received at time=1750311938 on cpu 18 ***
PC: @     0x7061ef325e2e  (unknown)  epoll_wait
    @     0x706016fac0ed         64  absl::lts_20230802::AbslFailureSignalHandler()
    @     0x7061ef242520  (unknown)  (unknown)
[2025-06-19 02:45:38,477 E 7215 7215] logging.cc:440: *** SIGTERM received at time=1750311938 on cpu 18 ***
[2025-06-19 02:45:38,477 E 7215 7215] logging.cc:440: PC: @     0x7061ef325e2e  (unknown)  epoll_wait
[2025-06-19 02:45:38,480 E 7215 7215] logging.cc:440:     @     0x706016fac119         64  absl::lts_20230802::AbslFailureSignalHandler()
[2025-06-19 02:45:38,480 E 7215 7215] logging.cc:440:     @     0x7061ef242520  (unknown)  (unknown)


[2025-06-19 02:45:41,203] Processed  | FILE = NIDS_NF-CSE-CIC-IDS2018_Binary                                   | F1_WEIGHTED = 0.986126     | FIT_TIME = 19.351
[2025-06-19 02:45:41,205] Processing | FILE = NIDS_NF-BoT-IoT-v3_Binary                                        | FILE_SIZE   = 60.596 MB    | MAX_TIME = 60    


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

[2025-06-19 02:47:08,995] Processing | FILE = NIDS_NF-BoT-IoT-v3_Binary                                        | FILE_SIZE   = 60.596 MB    | MAX_TIME = 120   


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]