## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Importing Libraries</p>

In [1]:
import pandas as pd
import numpy as np, os
import matplotlib.pyplot as plt
# import seaborn as sns|
from sklearn.preprocessing import LabelEncoder,StandardScaler,OrdinalEncoder
from sklearn.model_selection import train_test_split, GridSearchCV,StratifiedShuffleSplit
from autogluon.tabular import TabularPredictor
# import optuna
from sklearn.metrics import roc_auc_score, roc_curve, auc, classification_report, confusion_matrix, accuracy_score,matthews_corrcoef
import scipy
import warnings


from sklearn.metrics import matthews_corrcoef
from autogluon.core.metrics import make_scorer
warnings.filterwarnings('ignore')
# Custom metric function
def custom_mcc_metric(y_true, y_pred):
    mcc = matthews_corrcoef(y_true, y_pred)
    return mcc
custom_mcc_scorer = make_scorer(name='mcc',
                                score_func=custom_mcc_metric,
                                greater_is_better=True)

  from .autonotebook import tqdm as notebook_tqdm


  ### <p style="background-color: #fdefff;color:#c12eff;display: inline-block;padding:.6rem;border-radius:.5rem;border: 1px solid #c059ff">Loading data</p>

In [2]:
cd /workspace/data/

/workspace/data


In [3]:
train_data = pd.read_csv(os.path.join(os.getcwd(), "kaggle/playground-series-s4e8/train.csv"))
test_data = pd.read_csv(os.path.join(os.getcwd(), "kaggle/playground-series-s4e8/test.csv"))
sample_submission_data = pd.read_csv(os.path.join(os.getcwd(), "kaggle/playground-series-s4e8/sample_submission.csv"))

print("train_data :", train_data.shape)
print("test_data :", test_data.shape)
print("sample_submission_data :", sample_submission_data.shape)

train_data : (3116945, 22)
test_data : (2077964, 21)
sample_submission_data : (2077964, 2)


## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Handling NaN Values And Less Frequent Categories</p> 

In [4]:
# Index(['id', 'class', 'cap-diameter', 'cap-shape', 'cap-surface', 'cap-color',
#        'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
#        'stem-height', 'stem-width', 'stem-root', 'stem-surface', 'stem-color',
#        'veil-type', 'veil-color', 'has-ring', 'ring-type', 'spore-print-color',
#        'habitat', 'season', 'veil-info'],
#       dtype='object')

# 결측치와 특성 존재 여부를 동시에 다룰 수 있음: 앞에서 0과 1로 veil-type의 존재 여부를 표현하고, 뒤에서 'unknown'을 통해 veil-color의 결측치를 처리함으로써 두 정보를 효과적으로 결합할 수 있습니다.
# 모델 학습에 유용한 정보 제공: 결측치나 특성의 존재 여부를 단순히 무시하지 않고, 이들을 결합하여 모델이 더 많은 패턴을 학습할 수 있게 도와줍니다.
# Index(['w', 'y', 'n', 'u', 'k', 'e', 'g', 'p', 'r', 'o', 's', 'a', 't', 'd',
#        'i', 'h', 'c', 'f', 'l', 'b', 'z', '8.25', '2.49', '3.32'],
#       dtype='object', name='veil-color')
# Index(['u', 'w', 'a', 'f', 'e', 'b', 'c', 'y', 'k', 'g', 'n', 's', 'r', 'd',
#        'p', 'h', 'i', 'l', 'is None', 't', '21.11', '5.94'],
#       dtype='object', name='veil-type')

# Index(['f10', 'tnone', 't1', 't7', 't10', 't4', 't2', 't5', 't3', 't9',
#        'fnone', 'f1', 'f2', 'f7', 'f4', 't8', 'f5', 'r2', 't6', 'f3', 'l4',
#        't0', 'p5', 'z7', 'c10', 'x10', 'f9', 'f6', 's10', 'm9', 'hnone', 's1',
#        'g3', 'g5', 'h7', 'e1', 'f0', 'r4', 'dnone', 's5', 'cnone', 'h1', 'p1',
#        'h5', 'h10', 'w3', 'y2', 'a10', 'ynone', 'e10', 'p7', '10.310', 's2',
#        'o2', 'g10', 'h2', 'g1', 's3', 'p3', 'knone', 'inone', 'nnone', 'rnone',
#        'l5', 'c1', 'n10', 'c3', 'o10', 'e4', 'd10', 'f has-ring10', 'lnone',
#        'c7', 'e3', 'y1', 'k10'],
#       dtype='object', name='has-ring-type')

train_data['veil-info'] = train_data['veil-type'].notna().astype(int).astype(str) + train_data['veil-color'].fillna('unknown')
cap_shape_mapping = {'b': 0, 'c': 1, 'x': 2, 'f': 3, 's': 4, 'p': 5, 'o': 6}
cap_color_mapping = {'n': 0, 'b': 1, 'g': 2, 'r': 3, 'p': 4, 'u': 5, 'e': 6, 'w': 7, 'y': 8, 'l': 9, 'o': 10, 'k': 11}
ring_type_mapping = {'c': 0, 'e': 1, 'r': 2, 'g': 3, 'l': 4, 'p': 5, 's': 6, 'z': 7, 'y': 8, 'm': 9, 'f': 10}
train_data['cap-shape'] = train_data['cap-shape'].map(cap_shape_mapping)
train_data['cap-color'] = train_data['cap-color'].map(cap_color_mapping)
train_data['has-ring-type'] = (
    train_data['has-ring'] + 
    train_data['ring-type'].fillna('none').map(ring_type_mapping).apply(lambda x: f"{x:.0f}" if pd.notna(x) else 'none').str.replace('.0', '', regex=False)
)

test_data['veil-info'] = test_data['veil-type'].notna().astype(int).astype(str) + test_data['veil-color'].fillna('unknown')
test_data['cap-shape'] = test_data['cap-shape'].map(cap_shape_mapping)
test_data['cap-color'] = test_data['cap-color'].map(cap_color_mapping)
test_data['has-ring-type'] = (
    test_data['has-ring'] + 
    test_data['ring-type'].fillna('none').map(ring_type_mapping).apply(lambda x: f"{x:.0f}" if pd.notna(x) else 'none').str.replace('.0', '', regex=False)
)
train_data = train_data.drop(['id', 'veil-color', 'veil-type', 'has-ring', 'ring-type'], axis=1)
test_data = test_data.drop(['id', 'veil-color', 'veil-type', 'has-ring', 'ring-type'], axis=1)

In [5]:
def cleaning(df):
    threshold = 100
    
    cat_feats = ['cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
       'stem-root', 'stem-surface', 'stem-color', 'spore-print-color',
       'habitat', 'season', 'veil-info', 'has-ring-type']
    
    for feat in cat_feats:
        if df[feat].dtype.name == 'category':
            # Add 'missing' and 'noise' to categories if not present
            if 'missing' not in df[feat].cat.categories:
                df[feat] = df[feat].cat.add_categories('missing')
            if 'noise' not in df[feat].cat.categories:
                df[feat] = df[feat].cat.add_categories('noise')
        else:
            # Convert to category and add new categories
            df[feat] = df[feat].astype('category')
            df[feat] = df[feat].cat.add_categories(['missing', 'noise'])
        
        # Fill missing values with 'missing'
        df[feat] = df[feat].fillna('missing')
        
        # Replace infrequent categories with 'noise'
        counts = df[feat].value_counts(dropna=False)
        infrequent_categories = counts[counts < threshold].index
        df[feat] = df[feat].apply(lambda x: 'missing' if x in infrequent_categories else x)
    
    return df

# Example usage
train_data = cleaning(train_data)
test_data = cleaning(test_data)

In [6]:
group_by_features = ['stem-width', 'stem-height']
group_means_train = train_data.groupby(group_by_features)['cap-diameter'].mean()

def fill_na_with_group_mean(row):
    if pd.isna(row['cap-diameter']):
        group = tuple(row[group_by_features])
        return group_means_train.get(group, np.nan) 
    else:
        return row['cap-diameter']

train_data['cap-diameter'] = train_data.apply(fill_na_with_group_mean, axis=1)
test_data['cap-diameter'] = test_data.apply(fill_na_with_group_mean, axis=1)

In [7]:
group_by_features = ['stem-width', 'stem-height']
group_means_train = train_data.groupby(group_by_features)['cap-diameter'].mean()
group_by_features = ['stem-width', 'stem-height']

# Calculate group means for the train data
group_means_train = train_data.groupby(group_by_features)['cap-diameter'].mean()

def fill_na_with_group_mean(row, group_means):
    if pd.isna(row['cap-diameter']):
        group = tuple(row[group_by_features])
        return group_means.get(group, np.nan)
    else:
        return row['cap-diameter']

# Apply to train_data using train group means
train_data['cap-diameter'] = train_data.apply(fill_na_with_group_mean, axis=1, group_means=group_means_train)

# Apply the same group means from train_data to test_data
test_data['cap-diameter'] = test_data.apply(fill_na_with_group_mean, axis=1, group_means=group_means_train)
###
###
# Calculate the mode from the training data
cap_diameter_mode = train_data['cap-diameter'].mode()[0]
stem_height_mode = train_data['stem-height'].mode()[0]

# Fill missing values in the training data using the mode calculated from the training data
train_data['cap-diameter'] = train_data['cap-diameter'].fillna(cap_diameter_mode)
train_data['stem-height'] = train_data['stem-height'].fillna(stem_height_mode)

# Fill missing values in the test data using the mode calculated from the training data
test_data['cap-diameter'] = test_data['cap-diameter'].fillna(cap_diameter_mode)
test_data['stem-height'] = test_data['stem-height'].fillna(stem_height_mode)

cat_feats = ['cap-shape', 'cap-surface', 'cap-color',
       'does-bruise-or-bleed', 'gill-attachment', 'gill-spacing', 'gill-color',
       'stem-root', 'stem-surface', 'stem-color', 'spore-print-color',
       'habitat', 'season', 'veil-info', 'has-ring-type']

for feat in cat_feats:
    train_data[feat] = train_data[feat].astype('category')
for feat in cat_feats:
    test_data[feat] = test_data[feat].astype('category')

## <p style="background-color:#d8ecff; color: #009dff;margin:0; display:inline-block;padding:.4rem;border-radius:.25rem;border:1px solid #009dff">Splitting Data</p> 

In [None]:

# Initialize TabularPredictor with the custom MCC metric
predictor = TabularPredictor(
    label='class',
    problem_type='binary',
    eval_metric='f1_weighted', #custom_mcc_scorer, 'f1_weighted', 'log_loss'
    path='./kaggle/autogluon_models/',
    verbosity=2,
)
predictor.fit(train_data,
            presets='best_quality', # 'high_quality
            # time_limit=3600,  # Set an overall time limit
            # num_bag_folds=10,  # Use 5-fold bagging
            # num_stack_levels=2,  # Use 2 levels of stacking
            refit_full=True,  # Refit the best model on the full dataset
            # hyperparameters={
            #     'GBM': {'num_boost_round': 100, 'early_stopping_rounds': 10},
            #     'CAT': {'iterations': 1000, 'early_stopping_rounds': 50},
            #     'XGB': {'n_estimators': 100, 'early_stopping_rounds': 10},
            #     'FASTAI': {'epochs': 100, 'early_stopping_patience': 10}
            # },
            hyperparameter_tune_kwargs={
                'num_trials': 10,  # Set this based on resource availability
                'scheduler': 'local',
                'searcher': 'auto'
            },
            excluded_model_types=['KNN', 'XT', 'RF', 'LR'],  # Exclude KNN and Extra Trees models
            save_space=True  # Save space by removing intermediate models
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.9.19
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #213-Ubuntu SMP Fri Aug 2 19:14:16 UTC 2024
CPU Count:          32
Memory Avail:       122.72 GB / 125.58 GB (97.7%)
Disk Space Avail:   986.09 GB / 1829.69 GB (53.9%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3

[36m(_dystack pid=436)[0m ╭───────────────────────────────────────────────────────────╮
[36m(_dystack pid=436)[0m │ Configuration for experiment     NeuralNetFastAI_BAG_L1   │
[36m(_dystack pid=436)[0m ├───────────────────────────────────────────────────────────┤
[36m(_dystack pid=436)[0m │ Search algorithm                 SearchGenerator          │
[36m(_dystack pid=436)[0m │ Scheduler                        FIFOScheduler            │
[36m(_dystack pid=436)[0m │ Number of trials                 10                       │
[36m(_dystack pid=436)[0m ╰───────────────────────────────────────────────────────────╯
[36m(_dystack pid=436)[0m 
[36m(_dystack pid=436)[0m View detailed results here: /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L1


[36m(_dystack pid=436)[0m Reached timeout of 5.956292346239089 seconds. Stopping all trials.
[36m(_dystack pid=436)[0m Wrote the latest version of all result files and experiment state to '/workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetFastAI_BAG_L1' in 0.0133s.
[36m(model_trial pid=5916)[0m Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): The worker died unexpectedly while executing this task. Check python-core-worker-*.log files for more information.
[36m(model_trial pid=5916)[0m Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): The worker died unexpectedly while executing this task. Check python-core-worker-*.log files for more information.
[36m(model_trial pid=5916)[0m Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): The worker died unexpectedly while executing this task. Check python-core-worker-*.log files for more information.
[36m(model_trial pid=5916)[0m Unhandled error (suppress with 'RA

[36m(_dystack pid=436)[0m 


  0%|          | 0/10 [00:00<?, ?it/s]
[36m(_dystack pid=436)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=4, gpus=0, memory=0.88%)
[36m(_dystack pid=436)[0m 	Stopping HPO to satisfy time limit...
  0%|          | 0/10 [02:56<?, ?it/s]
[36m(_dystack pid=436)[0m Fitted model: XGBoost_BAG_L1/T1 ...
[36m(_dystack pid=436)[0m 	0.6779	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	175.66s	 = Training   runtime
[36m(_dystack pid=436)[0m 	5.61s	 = Validation runtime
[36m(_dystack pid=436)[0m Hyperparameter tuning model: NeuralNetTorch_BAG_L1 ... Tuning model for up to 5.96s of the 671.42s of remaining time.
[36m(model_trial pid=6204)[0m Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): The worker died unexpectedly while executing this task. Check python-core-worker-*.log files for more information.[32m [repeated 7x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_L

[36m(_dystack pid=436)[0m ╭──────────────────────────────────────────────────────────╮
[36m(_dystack pid=436)[0m │ Configuration for experiment     NeuralNetTorch_BAG_L1   │
[36m(_dystack pid=436)[0m ├──────────────────────────────────────────────────────────┤
[36m(_dystack pid=436)[0m │ Search algorithm                 SearchGenerator         │
[36m(_dystack pid=436)[0m │ Scheduler                        FIFOScheduler           │
[36m(_dystack pid=436)[0m │ Number of trials                 10                      │
[36m(_dystack pid=436)[0m ╰──────────────────────────────────────────────────────────╯
[36m(_dystack pid=436)[0m 
[36m(_dystack pid=436)[0m View detailed results here: /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_BAG_L1


[36m(_dystack pid=436)[0m Reached timeout of 5.956292346239089 seconds. Stopping all trials.
[36m(_dystack pid=436)[0m Wrote the latest version of all result files and experiment state to '/workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_BAG_L1' in 0.0880s.
[36m(_dystack pid=436)[0m Failed to fetch metrics for 4 trial(s):
[36m(_dystack pid=436)[0m - 4317b1c1: FileNotFoundError('Could not fetch metrics for 4317b1c1: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_BAG_L1/4317b1c1')
[36m(_dystack pid=436)[0m - 0cf20c47: FileNotFoundError('Could not fetch metrics for 0cf20c47: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_BAG_L1/0cf20c47')
[36m(_dystack pid=436)[0m - 2dd49251: FileNotFoundError('Could not fetch metrics for 2dd49251: both result.json and progress.csv we

[36m(_dystack pid=436)[0m 


[36m(_dystack pid=436)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=4, gpus=0, memory=0.74%)
[36m(_dystack pid=436)[0m 	0.9819	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	5.87s	 = Training   runtime
[36m(_dystack pid=436)[0m 	0.62s	 = Validation runtime
[36m(_dystack pid=436)[0m Hyperparameter tuning model: CatBoost_r177_BAG_L1 ... Tuning model for up to 5.96s of the 644.45s of remaining time.
[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for CatBoost_r177_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittingStrategy (5 workers, per: cpus=6, gpus=0, memory=0.68%)
[36m(_dystack pid=436)[0m Fitted model: CatBoost_r177_BAG_L1 ...
[36m(_dystack pid=436)[0m 	0.7347	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	5.04s	 = 

[36m(_dystack pid=436)[0m ╭──────────────────────────────────────────────────────────────╮
[36m(_dystack pid=436)[0m │ Configuration for experiment     NeuralNetTorch_r79_BAG_L1   │
[36m(_dystack pid=436)[0m ├──────────────────────────────────────────────────────────────┤
[36m(_dystack pid=436)[0m │ Search algorithm                 SearchGenerator             │
[36m(_dystack pid=436)[0m │ Scheduler                        FIFOScheduler               │
[36m(_dystack pid=436)[0m │ Number of trials                 10                          │
[36m(_dystack pid=436)[0m ╰──────────────────────────────────────────────────────────────╯
[36m(_dystack pid=436)[0m 
[36m(_dystack pid=436)[0m View detailed results here: /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r79_BAG_L1


[36m(_dystack pid=436)[0m Reached timeout of 5.956292346239089 seconds. Stopping all trials.
[36m(_dystack pid=436)[0m Wrote the latest version of all result files and experiment state to '/workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r79_BAG_L1' in 0.0060s.
[36m(_dystack pid=436)[0m Failed to fetch metrics for 4 trial(s):
[36m(_dystack pid=436)[0m - 30d3af7f: FileNotFoundError('Could not fetch metrics for 30d3af7f: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r79_BAG_L1/30d3af7f')
[36m(_dystack pid=436)[0m - 2dde55ce: FileNotFoundError('Could not fetch metrics for 2dde55ce: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r79_BAG_L1/2dde55ce')
[36m(_dystack pid=436)[0m - 8f5dc9d3: FileNotFoundError('Could not fetch metrics for 8f5dc9d3: both result.json and pro

[36m(_dystack pid=436)[0m 


[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for LightGBM_r131_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittingStrategy (5 workers, per: cpus=6, gpus=0, memory=0.70%)
[36m(_dystack pid=436)[0m Fitted model: LightGBM_r131_BAG_L1 ...
[36m(_dystack pid=436)[0m 	0.9359	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	6.89s	 = Training   runtime
[36m(_dystack pid=436)[0m 	0.61s	 = Validation runtime
[36m(_dystack pid=436)[0m Hyperparameter tuning model: NeuralNetFastAI_r191_BAG_L1 ... Tuning model for up to 5.96s of the 610.61s of remaining time.
[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for NeuralNetFastAI_r191_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with Parall

[36m(_dystack pid=436)[0m ╭──────────────────────────────────────────────────────────────╮
[36m(_dystack pid=436)[0m │ Configuration for experiment     NeuralNetTorch_r22_BAG_L1   │
[36m(_dystack pid=436)[0m ├──────────────────────────────────────────────────────────────┤
[36m(_dystack pid=436)[0m │ Search algorithm                 SearchGenerator             │
[36m(_dystack pid=436)[0m │ Scheduler                        FIFOScheduler               │
[36m(_dystack pid=436)[0m │ Number of trials                 10                          │
[36m(_dystack pid=436)[0m ╰──────────────────────────────────────────────────────────────╯
[36m(_dystack pid=436)[0m 
[36m(_dystack pid=436)[0m View detailed results here: /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r22_BAG_L1


[36m(_dystack pid=436)[0m Reached timeout of 5.956292346239089 seconds. Stopping all trials.
[36m(_dystack pid=436)[0m Wrote the latest version of all result files and experiment state to '/workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r22_BAG_L1' in 0.0054s.
[36m(_dystack pid=436)[0m Failed to fetch metrics for 3 trial(s):
[36m(_dystack pid=436)[0m - 2aec5b08: FileNotFoundError('Could not fetch metrics for 2aec5b08: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r22_BAG_L1/2aec5b08')
[36m(_dystack pid=436)[0m - 508638fe: FileNotFoundError('Could not fetch metrics for 508638fe: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r22_BAG_L1/508638fe')
[36m(_dystack pid=436)[0m - e6a492e9: FileNotFoundError('Could not fetch metrics for e6a492e9: both result.json and pro

[36m(_dystack pid=436)[0m 


[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for XGBoost_r33_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittingStrategy (5 workers, per: cpus=6, gpus=0, memory=0.92%)
[36m(_dystack pid=436)[0m Fitted model: XGBoost_r33_BAG_L1 ...
[36m(_dystack pid=436)[0m 	0.387	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	218.45s	 = Training   runtime
[36m(_dystack pid=436)[0m 	6.53s	 = Validation runtime
[36m(_dystack pid=436)[0m Hyperparameter tuning model: CatBoost_r137_BAG_L1 ... Tuning model for up to 5.96s of the 348.52s of remaining time.
[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for CatBoost_r137_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittin

[36m(_dystack pid=436)[0m ╭──────────────────────────────────────────────────────────────╮
[36m(_dystack pid=436)[0m │ Configuration for experiment     NeuralNetTorch_r30_BAG_L1   │
[36m(_dystack pid=436)[0m ├──────────────────────────────────────────────────────────────┤
[36m(_dystack pid=436)[0m │ Search algorithm                 SearchGenerator             │
[36m(_dystack pid=436)[0m │ Scheduler                        FIFOScheduler               │
[36m(_dystack pid=436)[0m │ Number of trials                 10                          │
[36m(_dystack pid=436)[0m ╰──────────────────────────────────────────────────────────────╯
[36m(_dystack pid=436)[0m 
[36m(_dystack pid=436)[0m View detailed results here: /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r30_BAG_L1


[36m(_dystack pid=436)[0m Reached timeout of 5.956292346239089 seconds. Stopping all trials.
[36m(_dystack pid=436)[0m Wrote the latest version of all result files and experiment state to '/workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r30_BAG_L1' in 0.0075s.
[36m(_dystack pid=436)[0m Failed to fetch metrics for 4 trial(s):
[36m(_dystack pid=436)[0m - 06bab833: FileNotFoundError('Could not fetch metrics for 06bab833: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r30_BAG_L1/06bab833')
[36m(_dystack pid=436)[0m - dc09502d: FileNotFoundError('Could not fetch metrics for dc09502d: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r30_BAG_L1/dc09502d')
[36m(_dystack pid=436)[0m - 3904529c: FileNotFoundError('Could not fetch metrics for 3904529c: both result.json and pro

[36m(_dystack pid=436)[0m 


[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for LightGBM_r130_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittingStrategy (5 workers, per: cpus=6, gpus=0, memory=0.68%)
[36m(_dystack pid=436)[0m Fitted model: LightGBM_r130_BAG_L1 ...
[36m(_dystack pid=436)[0m 	0.9876	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	6.61s	 = Training   runtime
[36m(_dystack pid=436)[0m 	0.62s	 = Validation runtime
[36m(_dystack pid=436)[0m Hyperparameter tuning model: NeuralNetTorch_r86_BAG_L1 ... Tuning model for up to 5.96s of the 30.71s of remaining time.
[36m(_dystack pid=436)[0m [output] This will use the new output engine with verbosity 0. To disable the new output and use the legacy output engine, set the environment variable RAY_AIR_NEW_OUTPUT=0. For more information, please see https://github.com/ray-project/r

[36m(_dystack pid=436)[0m ╭──────────────────────────────────────────────────────────────╮
[36m(_dystack pid=436)[0m │ Configuration for experiment     NeuralNetTorch_r86_BAG_L1   │
[36m(_dystack pid=436)[0m ├──────────────────────────────────────────────────────────────┤
[36m(_dystack pid=436)[0m │ Search algorithm                 SearchGenerator             │
[36m(_dystack pid=436)[0m │ Scheduler                        FIFOScheduler               │
[36m(_dystack pid=436)[0m │ Number of trials                 10                          │
[36m(_dystack pid=436)[0m ╰──────────────────────────────────────────────────────────────╯
[36m(_dystack pid=436)[0m 
[36m(_dystack pid=436)[0m View detailed results here: /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r86_BAG_L1


[36m(_dystack pid=436)[0m Reached timeout of 5.956292346239089 seconds. Stopping all trials.
[36m(_dystack pid=436)[0m Wrote the latest version of all result files and experiment state to '/workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r86_BAG_L1' in 0.0054s.
[36m(_dystack pid=436)[0m Failed to fetch metrics for 3 trial(s):
[36m(_dystack pid=436)[0m - bbda1ffc: FileNotFoundError('Could not fetch metrics for bbda1ffc: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r86_BAG_L1/bbda1ffc')
[36m(_dystack pid=436)[0m - 17843940: FileNotFoundError('Could not fetch metrics for 17843940: both result.json and progress.csv were not found at /workspace/data/kaggle/autogluon_models/ds_sub_fit/sub_fit_ho/models/NeuralNetTorch_r86_BAG_L1/17843940')
[36m(_dystack pid=436)[0m - 875313f3: FileNotFoundError('Could not fetch metrics for 875313f3: both result.json and pro

[36m(_dystack pid=436)[0m 


[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for CatBoost_r50_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLocalFoldFittingStrategy (5 workers, per: cpus=6, gpus=0, memory=0.70%)
[36m(_dystack pid=436)[0m Fitted model: CatBoost_r50_BAG_L1 ...
[36m(_dystack pid=436)[0m 	0.7596	 = Validation score   (f1_weighted)
[36m(_dystack pid=436)[0m 	4.49s	 = Training   runtime
[36m(_dystack pid=436)[0m 	0.27s	 = Validation runtime
[36m(_dystack pid=436)[0m Hyperparameter tuning model: NeuralNetFastAI_r11_BAG_L1 ... Tuning model for up to 5.96s of the 5.52s of remaining time.
[36m(_dystack pid=436)[0m 	No hyperparameter search space specified for NeuralNetFastAI_r11_BAG_L1. Skipping HPO. Will train one model based on the provided hyperparameters.
[36m(_dystack pid=436)[0m 	Fitting 5 child models (S1F1 - S1F5) | Fitting with ParallelLoca

In [None]:
id_column = sample_submission_data.pop('id')
y_test_pred = predictor.predict(test_data)

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'id': id_column,
    'class': y_test_pred
})

# Save the submission DataFrame to a CSV file
submission_df.to_csv('./kaggle/submission_autog_0827_1.csv', index=False)
print("Submission file created: submission5.csv")