In [None]:
!pip install category_encoders
!pip install nnetsauce
!pip install openml

In [3]:
import category_encoders as ce
import joblib
import nnetsauce as ns
import numpy as np
import openml
import pandas as pd
import warnings

from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
from time import time
from functools import lru_cache
from tqdm import tqdm

# 1. Utility functions and global variables
NROWS = 1000
NCOLS = 10

def select_NROWS_NCOLS(X, y):

    def replace_nan_with_median(arr):
        # Calculate the median of each column ignoring NaN values
        median_vals = np.nanmedian(arr, axis=0)

        # Iterate over each column index and replace NaN with the corresponding median value
        for col_idx in range(arr.shape[1]):
            col_values = arr[:, col_idx]
            nan_indices = np.isnan(col_values)
            if np.any(nan_indices):
                col_values[nan_indices] = median_vals[col_idx]

        return arr

    print(f"X.shape (initial): {X.shape}")
    print(f"y.shape (initial): {y.shape}")
    print("Encoding features and response...")

    # Label encoding and Hashing encoding
    encoder = ce.HashingEncoder(return_df=False)
    X = np.asarray(encoder.fit_transform(X, y)).astype(np.float32)
    X = replace_nan_with_median(X)

    print("...Done.")
    print(f"Finding top {NCOLS} features if necessary...")

    # Feature selection based on Random Forest feature importances
    if X.shape[1] > NCOLS:
        rf = RandomForestRegressor(n_estimators=50, random_state=42)
        rf.fit(X, y)
        indices = np.argsort(rf.feature_importances_)[::-1]
        top_cols = indices[:NCOLS]
        print(f"  top {NCOLS} indices: {top_cols}")
        X = X[:,top_cols]
        print(f"  X reduced shape: {X.shape}")
    print("...Done.")

    # Subsampling rows if necessary
    if X.shape[0] > NROWS:
        print(f"Subsampling to {NROWS} if necessary...")
        start = time()
        sub = ns.SubSampler(y=y.ravel(), n_samples=NROWS, seed=123, n_jobs=-1)
        idx_rows = sub.subsample().ravel()
        print(f"... Elapsed time for subsampling: {time() - start}")
        print("Number of rows in the subsample: ", len(idx_rows))
        return_X = replace_nan_with_median(X[idx_rows,:])
        return_y = y[idx_rows].ravel()
        print("Done.")
        return return_X, return_y
    else:
        return X, y

# 2. Get OpenML CC18 regression data
warnings.filterwarnings("ignore")

# Fetch the OpenML benchmark suite for regression tasks
benchmark_suite = openml.study.get_suite('OpenML-CC18')  # Obtain the benchmark suite
n_tasks = len(benchmark_suite.tasks)

Xys = {}

start = time()
for idx, task_id in tqdm(enumerate(benchmark_suite.tasks)):  # Iterate over all tasks
    try:
        print(f"\n\n Task #{idx}/{n_tasks} --------------------")
        warnings.filterwarnings("ignore")

        # Get the task information
        task = openml.tasks.get_task(task_id)  # Download the OpenML task
        # Get the data from OpenML
        task_name = task.get_dataset().name
        X_temp, y_temp = task.get_X_and_y()

        # Apply feature selection and subsampling
        Xys[task_name] = {"task_id": task_id,
                          "dataset": select_NROWS_NCOLS(X_temp, y_temp)}
        print(f"Elapsed time for task #{idx}: {time()-start}")
    except:
        continue

print(f"Total elapsed time: {time()-start}")


0it [00:00, ?it/s]



 Task #0/72 --------------------
X.shape (initial): (3196, 36)
y.shape (initial): (3196,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [ 9 32 20 31 14  0  5 34 16 15]
  X reduced shape: (3196, 10)
...Done.
Subsampling to 1000 if necessary...


1it [00:05,  5.06s/it]

... Elapsed time for subsampling: 1.0208461284637451
Number of rows in the subsample:  999
Done.
Elapsed time for task #0: 5.063868284225464


 Task #1/72 --------------------
X.shape (initial): (20000, 16)
y.shape (initial): (20000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


2it [00:13,  6.99s/it]

  top 10 indices: [10  8 11 14  7 13 12 15  9  5]
  X reduced shape: (20000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.06104731559753418
Number of rows in the subsample:  988


 Task #2/72 --------------------


3it [00:16,  5.28s/it]

X.shape (initial): (625, 4)
y.shape (initial): (625,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #2: 16.645115852355957


 Task #3/72 --------------------
X.shape (initial): (2000, 216)
y.shape (initial): (2000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


4it [00:32,  9.31s/it]

  top 10 indices: [211  96  36  93  23  85 185   7 151 157]
  X reduced shape: (2000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03722691535949707
Number of rows in the subsample:  1000
Done.
Elapsed time for task #3: 32.136184215545654


 Task #4/72 --------------------
X.shape (initial): (2000, 76)
y.shape (initial): (2000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


5it [00:49, 12.08s/it]

  top 10 indices: [72  1  2  0  6 71 73  8  4  5]
  X reduced shape: (2000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03704547882080078
Number of rows in the subsample:  1000
Done.
Elapsed time for task #4: 49.1170814037323


 Task #5/72 --------------------


6it [01:13, 16.22s/it]

X.shape (initial): (699, 9)
y.shape (initial): (699,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #5: 73.36651730537415


 Task #6/72 --------------------
X.shape (initial): (2000, 64)
y.shape (initial): (2000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


7it [01:26, 15.20s/it]

  top 10 indices: [ 0  9  5  4  2  6 24  1 26  3]
  X reduced shape: (2000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03626728057861328
Number of rows in the subsample:  1000
Done.
Elapsed time for task #6: 86.46943211555481


 Task #7/72 --------------------


8it [01:41, 15.10s/it]

X.shape (initial): (2000, 6)
y.shape (initial): (2000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.04811453819274902
Number of rows in the subsample:  1000
Done.
Elapsed time for task #7: 101.356121301651


 Task #8/72 --------------------
X.shape (initial): (2000, 47)
y.shape (initial): (2000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


9it [01:51, 13.46s/it]

  top 10 indices: [18 36 27  5 44 42 35 31 26  9]
  X reduced shape: (2000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03478240966796875
Number of rows in the subsample:  1000
Done.
Elapsed time for task #8: 111.2005307674408


 Task #9/72 --------------------


10it [02:03, 13.10s/it]

X.shape (initial): (1473, 9)
y.shape (initial): (1473,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.022933006286621094
Number of rows in the subsample:  1000
Done.
Elapsed time for task #9: 123.51276230812073


 Task #10/72 --------------------
X.shape (initial): (5620, 64)
y.shape (initial): (5620,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


11it [02:10, 11.30s/it]

  top 10 indices: [52 36 42 20 21 28 29 30 18 62]
  X reduced shape: (5620, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.038779258728027344
Number of rows in the subsample:  996
Done.
Elapsed time for task #10: 130.73208689689636


 Task #11/72 --------------------


12it [02:14,  9.09s/it]

X.shape (initial): (690, 15)
y.shape (initial): (690,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [ 8 13  2  1 14 10  7  5  6  4]
  X reduced shape: (690, 10)
...Done.
Elapsed time for task #11: 134.76689743995667


 Task #12/72 --------------------
X.shape (initial): (1000, 20)
y.shape (initial): (1000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


13it [02:18,  7.42s/it]

  top 10 indices: [ 4  0 12  1  3  6  2  5 11  7]
  X reduced shape: (1000, 10)
...Done.
Elapsed time for task #12: 138.3535487651825


 Task #13/72 --------------------
X.shape (initial): (10992, 16)
y.shape (initial): (10992,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


14it [02:25,  7.24s/it]

  top 10 indices: [13  0  8  7  1 15  4 14  3  9]
  X reduced shape: (10992, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03492474555969238
Number of rows in the subsample:  993
Done.
Elapsed time for task #13: 145.16286492347717


 Task #14/72 --------------------


15it [02:28,  6.13s/it]

X.shape (initial): (768, 8)
y.shape (initial): (768,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #14: 148.71383047103882


 Task #15/72 --------------------
X.shape (initial): (4601, 57)
y.shape (initial): (4601,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


16it [02:37,  6.78s/it]

  top 10 indices: [51 52  6 54 24 55 15 56 45  4]
  X reduced shape: (4601, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023231029510498047
Number of rows in the subsample:  999
Done.
Elapsed time for task #15: 157.0140483379364


 Task #16/72 --------------------
X.shape (initial): (3190, 60)
y.shape (initial): (3190,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


17it [02:41,  6.19s/it]

  top 10 indices: [30 31 29 28 34 27 32 33 25 24]
  X reduced shape: (3190, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02331852912902832
Number of rows in the subsample:  999
Done.
Elapsed time for task #16: 161.82724332809448


 Task #17/72 --------------------


18it [02:45,  5.43s/it]

X.shape (initial): (958, 9)
y.shape (initial): (958,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #17: 165.47929692268372


 Task #18/72 --------------------
X.shape (initial): (846, 18)
y.shape (initial): (846,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


19it [02:49,  4.88s/it]

  top 10 indices: [ 5 11 10  9  7  0  6 13  4 14]
  X reduced shape: (846, 10)
...Done.
Elapsed time for task #18: 169.084308385849


 Task #19/72 --------------------


20it [02:53,  4.62s/it]

X.shape (initial): (45312, 8)
y.shape (initial): (45312,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023288249969482422
Number of rows in the subsample:  1000
Done.
Elapsed time for task #19: 173.100492477417


 Task #20/72 --------------------
X.shape (initial): (6430, 36)
y.shape (initial): (6430,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


21it [03:01,  5.61s/it]

  top 10 indices: [19 16 20 17 15 12 21 32  9  5]
  X reduced shape: (6430, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03363156318664551
Number of rows in the subsample:  997
Done.
Elapsed time for task #20: 181.03424048423767


 Task #21/72 --------------------
X.shape (initial): (736, 19)
y.shape (initial): (736,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


22it [03:04,  5.10s/it]

  top 10 indices: [14 17 13 12 16 11 10  0  8  9]
  X reduced shape: (736, 10)
...Done.
Elapsed time for task #21: 184.9428722858429


 Task #22/72 --------------------
X.shape (initial): (3772, 29)
y.shape (initial): (3772,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


23it [03:08,  4.70s/it]

  top 10 indices: [19 21 25 17 28  0 23  2 18  5]
  X reduced shape: (3772, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023493051528930664
Number of rows in the subsample:  999
Done.
Elapsed time for task #22: 188.71552515029907


 Task #23/72 --------------------
X.shape (initial): (990, 12)
y.shape (initial): (990,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


24it [03:12,  4.49s/it]

  top 10 indices: [ 3  2  5 10  7  6  4 11  9  1]
  X reduced shape: (990, 10)
...Done.
Elapsed time for task #23: 192.6925287246704


 Task #24/72 --------------------
X.shape (initial): (7797, 617)
y.shape (initial): (7797,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [479 201 418 327 432   2 411 296 426 580]
  X reduced shape: (7797, 10)
...Done.
Subsampling to 1000 if necessary...


25it [12:15, 165.87s/it]

... Elapsed time for subsampling: 0.4092292785644531
Number of rows in the subsample:  988
Done.
Elapsed time for task #24: 735.0601677894592


 Task #25/72 --------------------
X.shape (initial): (841, 70)
y.shape (initial): (841,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


26it [12:19, 117.49s/it]

  top 10 indices: [59  9 56 69 21 23 50 34 41 29]
  X reduced shape: (841, 10)
...Done.
Elapsed time for task #25: 739.6899161338806


 Task #26/72 --------------------


27it [12:25, 83.99s/it] 

X.shape (initial): (797, 4)
y.shape (initial): (797,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #26: 745.5071656703949


 Task #27/72 --------------------
X.shape (initial): (70000, 784)
y.shape (initial): (70000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [409 263 464 353 239 567 155 269 316 407]
  X reduced shape: (70000, 10)
...Done.
Subsampling to 1000 if necessary...


28it [28:17, 344.26s/it]

... Elapsed time for subsampling: 0.8575036525726318
Number of rows in the subsample:  996
Done.
Elapsed time for task #27: 1697.016865491867


 Task #28/72 --------------------
X.shape (initial): (1458, 37)
y.shape (initial): (1458,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


29it [28:21, 242.45s/it]

  top 10 indices: [ 3  4  7 35  0 17 33 18 34 30]
  X reduced shape: (1458, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023412466049194336
Number of rows in the subsample:  999
Done.
Elapsed time for task #28: 1701.9438734054565


 Task #29/72 --------------------
X.shape (initial): (1563, 37)
y.shape (initial): (1563,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


30it [28:26, 171.10s/it]

  top 10 indices: [ 0 17 35 34  7  9  3 32  4 18]
  X reduced shape: (1563, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023400068283081055
Number of rows in the subsample:  999
Done.
Elapsed time for task #29: 1706.5688726902008


 Task #30/72 --------------------
X.shape (initial): (10885, 21)
y.shape (initial): (10885,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


31it [28:36, 122.83s/it]

  top 10 indices: [ 0  8  7  3 12 18 14 19  5 17]
  X reduced shape: (10885, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02301621437072754
Number of rows in the subsample:  999
Done.
Elapsed time for task #30: 1716.7661230564117


 Task #31/72 --------------------
X.shape (initial): (522, 21)
y.shape (initial): (522,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


32it [28:40, 87.06s/it] 

  top 10 indices: [19  0 14  2 17  8  7 16 12 13]
  X reduced shape: (522, 10)
...Done.
Elapsed time for task #31: 1720.3602149486542


 Task #32/72 --------------------
X.shape (initial): (2109, 21)
y.shape (initial): (2109,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


33it [28:44, 62.19s/it]

  top 10 indices: [ 7  8 12  0  5 19 18  9 16 14]
  X reduced shape: (2109, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023459434509277344
Number of rows in the subsample:  999
Done.
Elapsed time for task #32: 1724.5266337394714


 Task #33/72 --------------------
X.shape (initial): (1109, 21)
y.shape (initial): (1109,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


34it [28:48, 44.69s/it]

  top 10 indices: [13 15 14  8 17 16  0  5  7 12]
  X reduced shape: (1109, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023421287536621094
Number of rows in the subsample:  999
Done.
Elapsed time for task #33: 1728.3637170791626


 Task #34/72 --------------------
X.shape (initial): (48842, 14)
y.shape (initial): (48842,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


35it [29:05, 36.31s/it]

  top 10 indices: [ 5  2  0  4 10 12  6 11  1 13]
  X reduced shape: (48842, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02415776252746582
Number of rows in the subsample:  999
Done.
Elapsed time for task #34: 1745.1314389705658


 Task #35/72 --------------------
X.shape (initial): (3751, 1776)
y.shape (initial): (3751,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


36it [30:08, 44.41s/it]

  top 10 indices: [ 26  77  84  14   9   8 118  83   5 105]
  X reduced shape: (3751, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023758411407470703
Number of rows in the subsample:  999
Done.
Elapsed time for task #35: 1808.4323029518127


 Task #36/72 --------------------
X.shape (initial): (569, 30)
y.shape (initial): (569,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


37it [30:13, 32.59s/it]

  top 10 indices: [22 27  7 20 23 21  1 26 13 28]
  X reduced shape: (569, 10)
...Done.
Elapsed time for task #36: 1813.439383506775


 Task #37/72 --------------------


38it [30:17, 24.09s/it]

X.shape (initial): (5404, 5)
y.shape (initial): (5404,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023346662521362305
Number of rows in the subsample:  999
Done.
Elapsed time for task #37: 1817.6882419586182


 Task #38/72 --------------------
X.shape (initial): (1055, 41)
y.shape (initial): (1055,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


39it [30:21, 18.11s/it]

  top 10 indices: [35  0 11 33 38 21 17 36 13 29]
  X reduced shape: (1055, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023588180541992188
Number of rows in the subsample:  999
Done.
Elapsed time for task #38: 1821.8529534339905


 Task #39/72 --------------------
X.shape (initial): (5456, 24)
y.shape (initial): (5456,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


40it [30:29, 14.87s/it]

  top 10 indices: [18 14 19 17 11 23 13 12 22 16]
  X reduced shape: (5456, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023916244506835938
Number of rows in the subsample:  999
Done.
Elapsed time for task #39: 1829.1573448181152


 Task #40/72 --------------------
X.shape (initial): (1593, 256)
y.shape (initial): (1593,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


41it [30:34, 12.02s/it]

  top 10 indices: [110  78  94 145 161 126 146 177  93 138]
  X reduced shape: (1593, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03761768341064453
Number of rows in the subsample:  996
Done.
Elapsed time for task #40: 1834.5196244716644


 Task #41/72 --------------------


42it [30:37,  9.38s/it]

X.shape (initial): (583, 10)
y.shape (initial): (583,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #41: 1837.7354457378387


 Task #42/72 --------------------
X.shape (initial): (2600, 500)
y.shape (initial): (2600,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


43it [31:24, 20.55s/it]

  top 10 indices: [338 475 105 378  48 153 318 442  28 241]
  X reduced shape: (2600, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02363896369934082
Number of rows in the subsample:  1000
Done.
Elapsed time for task #42: 1884.355892419815


 Task #43/72 --------------------
X.shape (initial): (34465, 118)
y.shape (initial): (34465,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


44it [32:10, 28.21s/it]

  top 10 indices: [ 5 89  0 96 99 59 60 58 29 97]
  X reduced shape: (34465, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02376866340637207
Number of rows in the subsample:  999
Done.
Elapsed time for task #43: 1930.4541244506836


 Task #44/72 --------------------
X.shape (initial): (2534, 72)
y.shape (initial): (2534,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


45it [32:40, 28.70s/it]

  top 10 indices: [41 42 55 40 12 10 54  0 53 63]
  X reduced shape: (2534, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023291587829589844
Number of rows in the subsample:  999
Done.
Elapsed time for task #44: 1960.2973058223724


 Task #45/72 --------------------
X.shape (initial): (1080, 856)
y.shape (initial): (1080,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


46it [33:12, 29.62s/it]

  top 10 indices: [201 206 518 813 337 606 822 271 420 475]
  X reduced shape: (1080, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03653526306152344
Number of rows in the subsample:  999
Done.
Elapsed time for task #45: 1992.041718006134


 Task #46/72 --------------------
X.shape (initial): (6118, 51)
y.shape (initial): (6118,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


47it [33:45, 30.71s/it]

  top 10 indices: [14  2 28  9 24  5 16 11 12 26]
  X reduced shape: (6118, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.037429094314575195
Number of rows in the subsample:  996
Done.
Elapsed time for task #46: 2025.3210847377777


 Task #47/72 --------------------


48it [33:49, 22.64s/it]

X.shape (initial): (1372, 4)
y.shape (initial): (1372,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023273468017578125
Number of rows in the subsample:  999
Done.
Elapsed time for task #47: 2029.1299777030945


 Task #48/72 --------------------


49it [33:52, 16.86s/it]

X.shape (initial): (748, 4)
y.shape (initial): (748,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Elapsed time for task #48: 2032.4881732463837


 Task #49/72 --------------------
X.shape (initial): (11055, 30)
y.shape (initial): (11055,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


50it [33:57, 13.23s/it]

  top 10 indices: [13  7 14 25  5  6 28 23 12 24]
  X reduced shape: (11055, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02366471290588379
Number of rows in the subsample:  999
Done.
Elapsed time for task #49: 2037.2439432144165


 Task #50/72 --------------------
X.shape (initial): (540, 37)
y.shape (initial): (540,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


51it [34:00, 10.37s/it]

  top 10 indices: [19 27 25 33 26 22  0 13  1 21]
  X reduced shape: (540, 10)
...Done.
Elapsed time for task #50: 2040.9458093643188


 Task #51/72 --------------------
X.shape (initial): (45211, 16)
y.shape (initial): (45211,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


52it [34:16, 11.99s/it]

  top 10 indices: [11  5  0 10  9 13 15  1 12  3]
  X reduced shape: (45211, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02412271499633789
Number of rows in the subsample:  999
Done.
Elapsed time for task #51: 2056.7247178554535


 Task #52/72 --------------------
X.shape (initial): (9873, 32)
y.shape (initial): (9873,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


53it [34:48, 17.86s/it]

  top 10 indices: [27 25 10 24  1  5  2 28  4  7]
  X reduced shape: (9873, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.033788204193115234
Number of rows in the subsample:  998
Done.
Elapsed time for task #52: 2088.2646021842957


 Task #53/72 --------------------
X.shape (initial): (10299, 561)
y.shape (initial): (10299,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [389 381  52 503 393  86  65 366  83  96]
  X reduced shape: (10299, 10)
...Done.
Subsampling to 1000 if necessary...


54it [41:17, 129.21s/it]

... Elapsed time for subsampling: 0.722761869430542
Number of rows in the subsample:  997
Done.
Elapsed time for task #53: 2477.31458568573


 Task #54/72 --------------------


55it [41:21, 91.72s/it] 

X.shape (initial): (500, 12)
y.shape (initial): (500,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [ 2  4  0  8  6  1  3 10 11  5]
  X reduced shape: (500, 10)
...Done.
Elapsed time for task #54: 2481.545212507248


 Task #55/72 --------------------
X.shape (initial): (5500, 40)
y.shape (initial): (5500,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


56it [41:34, 68.16s/it]

  top 10 indices: [25  9 19 39 24 29 23 22 26  1]
  X reduced shape: (5500, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.0359344482421875
Number of rows in the subsample:  634


 Task #56/72 --------------------
X.shape (initial): (67557, 42)
y.shape (initial): (67557,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


57it [41:52, 53.02s/it]

  top 10 indices: [24 30 13 12 18  6 14 36 20 19]
  X reduced shape: (67557, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03445911407470703
Number of rows in the subsample:  999
Done.
Elapsed time for task #56: 2512.4383025169373


 Task #57/72 --------------------
X.shape (initial): (1080, 77)
y.shape (initial): (1080,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


58it [42:00, 39.49s/it]

  top 10 indices: [32 30 46 33 56 34 12 48 50 73]
  X reduced shape: (1080, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03473544120788574
Number of rows in the subsample:  998
Done.
Elapsed time for task #57: 2520.351277112961


 Task #58/72 --------------------
X.shape (initial): (1941, 27)
y.shape (initial): (1941,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


59it [42:08, 30.08s/it]

  top 10 indices: [13 10  1 12 16 24 14 11  3  2]
  X reduced shape: (1941, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.0355525016784668
Number of rows in the subsample:  997
Done.
Elapsed time for task #58: 2528.4619262218475


 Task #59/72 --------------------
X.shape (initial): (540, 18)
y.shape (initial): (540,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


60it [42:12, 22.38s/it]

  top 10 indices: [ 1  0 12 13 15  3  8 14 17  7]
  X reduced shape: (540, 10)
...Done.
Elapsed time for task #59: 2532.8775355815887


 Task #60/72 --------------------


61it [42:16, 16.73s/it]

X.shape (initial): (4839, 5)
y.shape (initial): (4839,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02371811866760254
Number of rows in the subsample:  999
Done.
Elapsed time for task #60: 2536.4236969947815


 Task #61/72 --------------------


62it [42:19, 12.63s/it]

X.shape (initial): (1728, 6)
y.shape (initial): (1728,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023296594619750977
Number of rows in the subsample:  998
Done.
Elapsed time for task #61: 2539.5020673274994


 Task #62/72 --------------------
X.shape (initial): (2310, 16)
y.shape (initial): (2310,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


63it [42:24, 10.30s/it]

  top 10 indices: [15  9 14 12  4  7  6  5  2 10]
  X reduced shape: (2310, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.035253047943115234
Number of rows in the subsample:  994
Done.
Elapsed time for task #62: 2544.3665347099304


 Task #63/72 --------------------
X.shape (initial): (2000, 240)
y.shape (initial): (2000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


64it [42:58, 17.38s/it]

  top 10 indices: [ 53 113  56 112  72 168 146 238 221  57]
  X reduced shape: (2000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.03625607490539551
Number of rows in the subsample:  1000
Done.
Elapsed time for task #63: 2578.2586295604706


 Task #64/72 --------------------
X.shape (initial): (70000, 784)
y.shape (initial): (70000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [ 38 417 416 209 426 379 454 228  39 388]
  X reduced shape: (70000, 10)
...Done.
Subsampling to 1000 if necessary...


65it [1:22:17, 719.82s/it]

... Elapsed time for subsampling: 0.5229713916778564
Number of rows in the subsample:  1000
Done.
Elapsed time for task #64: 4937.090500593185


 Task #65/72 --------------------


66it [1:22:21, 505.14s/it]

X.shape (initial): (44819, 6)
y.shape (initial): (44819,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02419424057006836
Number of rows in the subsample:  998
Done.
Elapsed time for task #65: 4941.31676530838


 Task #66/72 --------------------
X.shape (initial): (96320, 21)
y.shape (initial): (96320,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


67it [1:25:34, 411.43s/it]

  top 10 indices: [ 1  4 15 12  5 19 14 18 16 11]
  X reduced shape: (96320, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.0349574089050293
Number of rows in the subsample:  999
Done.
Elapsed time for task #66: 5134.096231222153


 Task #67/72 --------------------
X.shape (initial): (92000, 1024)
y.shape (initial): (92000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...
  top 10 indices: [957 379 184 764 536 356 828 915 183 125]
  X reduced shape: (92000, 10)
...Done.
Subsampling to 1000 if necessary...


68it [2:10:09, 1090.60s/it]

... Elapsed time for subsampling: 0.6362159252166748
Number of rows in the subsample:  967
Done.
Elapsed time for task #67: 7809.424352169037


 Task #68/72 --------------------


69it [2:10:24, 767.91s/it] 



 Task #69/72 --------------------
X.shape (initial): (3279, 1558)
y.shape (initial): (3279,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


70it [2:11:17, 553.49s/it]

  top 10 indices: [   1 1243  351 1399    2  398    0 1229  508  246]
  X reduced shape: (3279, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.023624897003173828
Number of rows in the subsample:  999
Done.
Elapsed time for task #69: 7877.581933021545


 Task #70/72 --------------------
X.shape (initial): (3186, 180)
y.shape (initial): (3186,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


71it [2:11:44, 395.55s/it]

  top 10 indices: [ 92 104  89  84  93  95  94  96  99  83]
  X reduced shape: (3186, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02333545684814453
Number of rows in the subsample:  999
Done.
Elapsed time for task #70: 7904.604424238205


 Task #71/72 --------------------
X.shape (initial): (5000, 20)
y.shape (initial): (5000,)
Encoding features and response...
...Done.
Finding top 10 features if necessary...


72it [2:11:53, 109.91s/it]

  top 10 indices: [ 7  9 19 17 10  4 12 18 16  6]
  X reduced shape: (5000, 10)
...Done.
Subsampling to 1000 if necessary...
... Elapsed time for subsampling: 0.02367687225341797
Number of rows in the subsample:  999
Done.
Elapsed time for task #71: 7913.815386295319
Total elapsed time: 7913.817942380905





In [4]:
import joblib

joblib.dump(Xys, "openml-Xys-2025-11-07.pkl")

['openml-Xys-2025-11-07.pkl']