In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier, Pool
from scipy.optimize import minimize
from datetime import datetime
import gc
import warnings

warnings.filterwarnings("ignore")

In [2]:
# Create a log filename with the notebook name and current datetime
current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
log_filename = f'kaggle_submission_{current_time}.log'

# Configure logging to save to a file
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(log_filename),
        logging.StreamHandler()  # This ensures logs are also output to the console
    ]
)

In [3]:
import pandas as pd
import numpy as np
import logging
import gc

def reduce_mem_usage(df, verbose=True):
    """Iterate through all the columns of a dataframe and modify the data type to reduce memory usage."""
    start_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        logging.info(f'Start memory usage of dataframe: {start_mem:.2f} MB')


    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    if verbose:
        logging.info(f'End memory usage of dataframe: {end_mem:.2f} MB')
        logging.info(f'Decreased by {(100 * (start_mem - end_mem) / start_mem):.1f}%')

    return df

def safe_map(df, column, mapping):
    """Map categorical values to numerical values and log any unknown categories."""
    unknown_categories = set(df[column]) - set(mapping.keys())
    if unknown_categories:
        logging.warning(f"Unknown categories in column {column}: {unknown_categories}")
    df[column] = df[column].map(mapping)
    return df

def import_data(file, **kwargs):
    """Create a dataframe and optimize its memory usage."""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True, **kwargs)
    df = reduce_mem_usage(df)
    return df

def preprocess_data(df):
    """Preprocess the dataset."""
    gender_mapping = {'Male': 1, 'Female': 0}
    vehicle_damage_mapping = {'Yes': 1, 'No': 0}
    vehicle_age_mapping = {'< 1 Year': 0, '1-2 Year': 1, '> 2 Years': 2}
    
    df = safe_map(df, 'Gender', gender_mapping)
    df = safe_map(df, 'Vehicle_Damage', vehicle_damage_mapping)
    df = safe_map(df, 'Vehicle_Age', vehicle_age_mapping)
    
    df.drop(['Driving_License'], axis=1, inplace=True)
    return df

def feature_engineering(df):
    """Feature engineering on the dataset."""
    df['Previously_Insured_Annual_Premium'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Annual_Premium'].astype(str)))[0]
    df['Previously_Insured_Vehicle_Age'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Vehicle_Age'].astype(str)))[0]
    df['Previously_Insured_Vehicle_Damage'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Vehicle_Damage'].astype(str)))[0]
    df['Previously_Insured_Vintage'] = pd.factorize((df['Previously_Insured'].astype(str) + df['Vintage'].astype(str)))[0]
    return df



In [4]:
# Paths to datasets
train_path = r"C:\Users\paulo\OneDrive\Documents\kaggle_competition_2_datasets\train.csv"
test_path = r"C:\Users\paulo\OneDrive\Documents\kaggle_competition_2_datasets\test.csv"

# Load and optimize data
train_df = import_data(train_path, index_col='id')
test_df = import_data(test_path, index_col='id')

gc.collect()
print(f"DataFrame after import: {type(train_df)}")
logging.info("Data loaded successfully.")

# Apply preprocessing
train_df = preprocess_data(train_df)
test_df = preprocess_data(test_df)
print(f"DataFrame after preprocessing: {type(train_df)}")
logging.info("Data preprocessed successfully.")

# Apply feature engineering
train_df = feature_engineering(train_df)
test_df = feature_engineering(test_df)

gc.collect()
print(f"DataFrame after feature engineering: {type(train_df)}")
logging.info("Feature engineering completed successfully.")

2024-07-23 23:13:56,190 - INFO - Start memory usage of dataframe: 1053.30 MB
2024-07-23 23:13:57,831 - INFO - End memory usage of dataframe: 318.18 MB
2024-07-23 23:13:57,831 - INFO - Decreased by 69.8%
2024-07-23 23:14:07,967 - INFO - Start memory usage of dataframe: 643.68 MB
2024-07-23 23:14:09,076 - INFO - End memory usage of dataframe: 204.81 MB
2024-07-23 23:14:09,077 - INFO - Decreased by 68.2%
2024-07-23 23:14:09,144 - INFO - Data loaded successfully.


DataFrame after import: <class 'pandas.core.frame.DataFrame'>


2024-07-23 23:14:11,051 - INFO - Data preprocessed successfully.


DataFrame after preprocessing: <class 'pandas.core.frame.DataFrame'>


2024-07-23 23:14:45,424 - INFO - Feature engineering completed successfully.


DataFrame after feature engineering: <class 'pandas.core.frame.DataFrame'>


In [5]:
# Separate features and target variable
X = train_df.drop('Response', axis=1)
y = train_df['Response']

num_cols = ['Age', 'Region_Code', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage']
scaler = StandardScaler()
train_df[num_cols] = scaler.fit_transform(train_df[num_cols])
test_df[num_cols] = scaler.transform(test_df[num_cols])

# Create Stratified K-Folds
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [12]:
# Define CatBoost parameters
cat_params = {
    'loss_function': 'Logloss',
    'eval_metric': 'AUC',
    'class_names': [0, 1],
    'learning_rate': 0.075,
    'iterations': 5500,
    'depth': 9,
    'random_strength': 0,
    'l2_leaf_reg': 0.5,
    'max_leaves': 512,
    'fold_permutation_block': 64,
    'task_type': 'GPU',  # Ensure your environment supports GPU
    'random_seed': 42,
    'allow_writing_files': False,
    'verbose': 100,  # Display log every 100 iterations
    # 'thread_count': -1
}

# Initialize lists to store out-of-fold predictions and AUC scores
cat_preds = []
cat_aucs = []

test_pool = Pool(test_df.astype(str), cat_features=X.columns.values)

# CatBoost Model
for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
    print(f"\n---- CatBoost Fold {fold + 1} ----\n")
    
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_valid, y_valid = X.iloc[test_idx], y.iloc[test_idx]
    
    train_pool = Pool(X_train.astype(str), y_train, cat_features=X.columns.values)
    valid_pool = Pool(X_valid.astype(str), y_valid, cat_features=X.columns.values)
    
    model = CatBoostClassifier(**cat_params)
    model.fit(train_pool, eval_set=valid_pool, early_stopping_rounds=50, verbose=100)
    
    valid_preds = model.predict_proba(X_valid.astype(str))[:, 1]
    auc_score = roc_auc_score(y_valid, valid_preds)
    cat_aucs.append(auc_score)
    print(f"Validation AUC score for fold {fold + 1}: {auc_score:.6f}")
    
    test_pred = model.predict_proba(test_pool)[:, 1]
    cat_preds.append(test_pred)
    
    # Clear memory
    del X_train, y_train, X_valid, y_valid, train_pool, valid_pool, model
    gc.collect()

# Calculate overall AUC score for CatBoost
auc_mean_cat = np.mean(cat_aucs)
auc_std_cat = np.std(cat_aucs)
print(f"\n---> Overall ROC-AUC Score for CatBoost: {auc_mean_cat:.6f} ± {auc_std_cat:.6f}\n")

# Average the predictions from each fold for CatBoost
test_pred_cat = np.mean(cat_preds, axis=0)



---- CatBoost Fold 1 ----



Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8757758	best: 0.8757758 (0)	total: 1.28s	remaining: 1h 57m 40s
100:	test: 0.8919066	best: 0.8919066 (100)	total: 2m 21s	remaining: 2h 6m 22s
200:	test: 0.8934487	best: 0.8934487 (200)	total: 4m 33s	remaining: 2h 1s
300:	test: 0.8940179	best: 0.8940179 (300)	total: 6m 45s	remaining: 1h 56m 41s
400:	test: 0.8943058	best: 0.8943058 (400)	total: 9m 2s	remaining: 1h 55m 1s
500:	test: 0.8945133	best: 0.8945133 (500)	total: 11m 14s	remaining: 1h 52m 13s
600:	test: 0.8946729	best: 0.8946729 (600)	total: 13m 31s	remaining: 1h 50m 13s
700:	test: 0.8947868	best: 0.8947868 (700)	total: 15m 47s	remaining: 1h 48m 3s
800:	test: 0.8948612	best: 0.8948612 (800)	total: 18m 3s	remaining: 1h 45m 55s
900:	test: 0.8949273	best: 0.8949273 (900)	total: 20m 20s	remaining: 1h 43m 48s
1000:	test: 0.8949838	best: 0.8949838 (1000)	total: 22m 34s	remaining: 1h 41m 29s
1100:	test: 0.8950353	best: 0.8950353 (1100)	total: 24m 49s	remaining: 1h 39m 9s
1200:	test: 0.8950908	best: 0.8950908 (1200)	total: 27m 4

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8751968	best: 0.8751968 (0)	total: 1.26s	remaining: 1h 55m 46s
100:	test: 0.8916085	best: 0.8916085 (100)	total: 2m 17s	remaining: 2h 2m 52s
200:	test: 0.8930614	best: 0.8930614 (200)	total: 4m 28s	remaining: 1h 57m 53s
300:	test: 0.8936133	best: 0.8936133 (300)	total: 6m 41s	remaining: 1h 55m 27s
400:	test: 0.8939499	best: 0.8939499 (400)	total: 8m 53s	remaining: 1h 53m 3s
500:	test: 0.8941349	best: 0.8941349 (500)	total: 11m 6s	remaining: 1h 50m 52s
600:	test: 0.8942729	best: 0.8942729 (600)	total: 13m 18s	remaining: 1h 48m 28s
700:	test: 0.8943772	best: 0.8943772 (700)	total: 15m 31s	remaining: 1h 46m 14s
800:	test: 0.8944550	best: 0.8944550 (800)	total: 17m 43s	remaining: 1h 43m 58s
900:	test: 0.8945208	best: 0.8945208 (900)	total: 19m 54s	remaining: 1h 41m 35s
1000:	test: 0.8945849	best: 0.8945849 (1000)	total: 22m 3s	remaining: 1h 39m 10s
1100:	test: 0.8946387	best: 0.8946387 (1100)	total: 24m 14s	remaining: 1h 36m 52s
1200:	test: 0.8946714	best: 0.8946714 (1200)	total

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8754984	best: 0.8754984 (0)	total: 1.18s	remaining: 1h 48m 1s
100:	test: 0.8919761	best: 0.8919761 (100)	total: 2m 16s	remaining: 2h 1m 51s
200:	test: 0.8934428	best: 0.8934428 (200)	total: 4m 28s	remaining: 1h 58m 2s
300:	test: 0.8939869	best: 0.8939869 (300)	total: 6m 39s	remaining: 1h 55m 3s
400:	test: 0.8942636	best: 0.8942636 (400)	total: 8m 52s	remaining: 1h 52m 51s
500:	test: 0.8944498	best: 0.8944498 (500)	total: 11m 5s	remaining: 1h 50m 35s
600:	test: 0.8945998	best: 0.8945998 (600)	total: 13m 16s	remaining: 1h 48m 13s
700:	test: 0.8946897	best: 0.8946897 (700)	total: 15m 31s	remaining: 1h 46m 19s
800:	test: 0.8947679	best: 0.8947679 (800)	total: 17m 45s	remaining: 1h 44m 8s
900:	test: 0.8948267	best: 0.8948271 (899)	total: 19m 56s	remaining: 1h 41m 49s
1000:	test: 0.8948674	best: 0.8948674 (1000)	total: 22m 7s	remaining: 1h 39m 25s
1100:	test: 0.8949169	best: 0.8949169 (1100)	total: 24m 19s	remaining: 1h 37m 11s
1200:	test: 0.8949580	best: 0.8949580 (1200)	total: 2

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8751477	best: 0.8751477 (0)	total: 1.45s	remaining: 2h 13m
100:	test: 0.8917821	best: 0.8917821 (100)	total: 2m 19s	remaining: 2h 4m 10s
200:	test: 0.8931704	best: 0.8931704 (200)	total: 4m 28s	remaining: 1h 57m 49s
300:	test: 0.8937134	best: 0.8937134 (300)	total: 6m 41s	remaining: 1h 55m 40s
400:	test: 0.8940150	best: 0.8940150 (400)	total: 8m 54s	remaining: 1h 53m 20s
500:	test: 0.8942310	best: 0.8942310 (500)	total: 11m 8s	remaining: 1h 51m 11s
600:	test: 0.8943666	best: 0.8943666 (600)	total: 13m 20s	remaining: 1h 48m 48s
700:	test: 0.8944755	best: 0.8944755 (700)	total: 15m 30s	remaining: 1h 46m 12s
800:	test: 0.8945644	best: 0.8945644 (800)	total: 17m 46s	remaining: 1h 44m 13s
900:	test: 0.8946195	best: 0.8946195 (900)	total: 20m	remaining: 1h 42m 6s
1000:	test: 0.8946787	best: 0.8946787 (1000)	total: 22m 11s	remaining: 1h 39m 43s
1100:	test: 0.8947118	best: 0.8947120 (1098)	total: 24m 25s	remaining: 1h 37m 34s
1200:	test: 0.8947526	best: 0.8947528 (1199)	total: 26m 3

Default metric period is 5 because AUC is/are not implemented for GPU


0:	test: 0.8762399	best: 0.8762399 (0)	total: 1.2s	remaining: 1h 50m 7s
100:	test: 0.8922962	best: 0.8922962 (100)	total: 2m 20s	remaining: 2h 5m 12s
200:	test: 0.8938102	best: 0.8938102 (200)	total: 4m 32s	remaining: 1h 59m 35s
300:	test: 0.8943968	best: 0.8943968 (300)	total: 6m 46s	remaining: 1h 57m 8s
400:	test: 0.8947190	best: 0.8947190 (400)	total: 9m	remaining: 1h 54m 28s
500:	test: 0.8948994	best: 0.8948994 (500)	total: 11m 13s	remaining: 1h 52m 4s
600:	test: 0.8950514	best: 0.8950514 (600)	total: 13m 27s	remaining: 1h 49m 43s
700:	test: 0.8951722	best: 0.8951722 (700)	total: 15m 42s	remaining: 1h 47m 31s
800:	test: 0.8952501	best: 0.8952501 (800)	total: 17m 58s	remaining: 1h 45m 25s
900:	test: 0.8953233	best: 0.8953233 (900)	total: 20m 14s	remaining: 1h 43m 21s
1000:	test: 0.8953665	best: 0.8953665 (1000)	total: 22m 30s	remaining: 1h 41m 9s
1100:	test: 0.8954027	best: 0.8954027 (1100)	total: 24m 45s	remaining: 1h 38m 56s
1200:	test: 0.8954355	best: 0.8954355 (1200)	total: 27m 

In [None]:
# Define LightGBM parameters
lgb_params = {
    'objective': 'binary',
    'metric': 'auc',
    'reg_alpha': 0.03432385172267505,
    'reg_lambda': 0.2998279059616829,
    'colsample_bytree': 0.790292183596673,
    'subsample': 0.9046878168822107,
    'learning_rate': 0.05035039561309864,
    'max_depth': 10,  # Further reduced max depth
    'num_leaves': 31,  # Standard number of leaves
    'min_child_samples': 100,  # Increased min child samples
    'min_child_weight': 1,  # Adjusted min child weight
    'min_split_gain': 0.09978597066868167,
    'max_bin': 255,
    'device': 'gpu',
    'early_stopping_rounds': 50,
    'verbose': 1  # Enable verbose mode
}

# Initialize lists to store out-of-fold predictions and AUC scores
lgb_preds = []
lgb_aucs = []

# Initialize StratifiedKFold
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Train LightGBM model with cross-validation
for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
    print(f"\n---- Fold {fold + 1} ----\n")
    
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_valid, y_valid = X.iloc[test_idx], y.iloc[test_idx]

    print(f"Fold {fold + 1} class distribution in training set: {np.bincount(y_train)}")
    print(f"Fold {fold + 1} class distribution in validation set: {np.bincount(y_valid)}")
    
    train_data = lgb.Dataset(X_train, label=y_train)
    valid_data = lgb.Dataset(X_valid, label=y_valid, reference=train_data)
    
    model = None  # Initialize model as None
    try:
        model = lgb.train(
            lgb_params,
            train_data,
            num_boost_round=3000,
            valid_sets=[train_data, valid_data],
        )
        
        valid_preds = model.predict(X_valid, num_iteration=model.best_iteration)
        auc_score = roc_auc_score(y_valid, valid_preds)
        lgb_aucs.append(auc_score)
        print(f"Validation AUC score for fold {fold + 1}: {auc_score:.6f}")
        
        test_pred = model.predict(test_df, num_iteration=model.best_iteration)  # Use test_df for the test set
        lgb_preds.append(test_pred)
        
    except lgb.basic.LightGBMError as e:
        print(f"LightGBMError in fold {fold + 1}: {str(e)}")
    
    # Clear memory
    del X_train, y_train, X_valid, y_valid, train_data, valid_data
    if model is not None:
        del model
    gc.collect()

# Calculate overall AUC score
auc_mean_lgb = np.mean(lgb_aucs)
auc_std_lgb = np.std(lgb_aucs)
print(f"\n---> Overall ROC-AUC Score: {auc_mean_lgb:.6f} ± {auc_std_lgb:.6f}\n")

# Average the predictions from each fold
test_pred_lgb = np.mean(lgb_preds, axis=0)


---- Fold 1 ----

Fold 1 class distribution in training set: [8071791 1132047]
Fold 1 class distribution in validation set: [2017948  283012]
[LightGBM] [Info] Number of positive: 1132047, number of negative: 8071791
[LightGBM] [Info] This is the GPU trainer!!
[LightGBM] [Info] Total Bins 1254
[LightGBM] [Info] Number of data points in the train set: 9203838, number of used features: 13
[LightGBM] [Info] Using GPU Device: NVIDIA GeForce RTX 2070 SUPER, Vendor: NVIDIA Corporation
[LightGBM] [Info] Compiling OpenCL Kernel with 256 bins...
[LightGBM] [Info] GPU programs have been built
[LightGBM] [Info] Size of histogram bin entry: 8
[LightGBM] [Info] 13 dense feature groups (140.44 MB) transferred to GPU in 0.137421 secs. 0 sparse feature groups
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.122997 -> initscore=-1.964348
[LightGBM] [Info] Start training from score -1.964348
Training until validation scores don't improve for 50 rounds


KeyboardInterrupt: 

In [None]:
# Define XGBoost parameters
xgb_params = {
    'eval_metric': 'auc',
    'eta': 0.05,
    'alpha': 0.2545607592482198,
    'subsample': 0.8388163485383147,
    'colsample_bytree': 0.2732499701466825,
    'max_depth': 16,
    'min_child_weight': 5,
    'gamma': 0.0017688666476104672,
    'max_bin': 262143,
    'tree_method': 'gpu_hist',  # Ensure your environment supports GPU
    'predictor': 'gpu_predictor',  # Ensure your environment supports GPU
    'enable_categorical': True,
    'verbose': 100
}

# Initialize lists to store out-of-fold predictions and AUC scores
xgb_preds = []
xgb_aucs = []

# Initialize StratifiedKFold
skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Train XGBoost model with cross-validation
for fold, (train_idx, test_idx) in enumerate(skfold.split(X, y)):
    print(f"\n---- Fold {fold + 1} ----\n")
    
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_valid, y_valid = X.iloc[test_idx], y.iloc[test_idx]
    
    dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
    dvalid = xgb.DMatrix(X_valid, label=y_valid, enable_categorical=True)
    
    model = xgb.train(
        xgb_params,
        dtrain,
        num_boost_round=3000,
        evals=[(dtrain, 'train'), (dvalid, 'valid')],
        early_stopping_rounds=50,
        verbose_eval=100  # Display log every 100 rounds
    )
    
    # Use best_iteration instead of best_ntree_limit
    valid_preds = model.predict(dvalid, iteration_range=(0, model.best_iteration))
    auc_score = roc_auc_score(y_valid, valid_preds)
    xgb_aucs.append(auc_score)
    print(f"Validation AUC score for fold {fold + 1}: {auc_score:.6f}")
    
    dtest = xgb.DMatrix(test_df, enable_categorical=True)  # Use test_df for the test set
    test_pred = model.predict(dtest, iteration_range=(0, model.best_iteration))
    xgb_preds.append(test_pred)
    
    # Clear memory
    del X_train, y_train, X_valid, y_valid, dtrain, dvalid, model
    gc.collect()

# Calculate overall AUC score for XGBoost
auc_mean_xgb = np.mean(xgb_aucs)
auc_std_xgb = np.std(xgb_aucs)
print(f"\n---> Overall ROC-AUC Score for XGBoost: {auc_mean_xgb:.6f} ± {auc_std_xgb:.6f}\n")

# Average the predictions from each fold for XGBoost
test_pred_xgb = np.mean(xgb_preds, axis=0)


---- Fold 1 ----

[0]	train-auc:0.84812	valid-auc:0.84825
[100]	train-auc:0.88909	valid-auc:0.88171


KeyboardInterrupt: 

In [None]:
test_df = import_data(test_path, index_col='id')

# Normalize AUC scores to sum to 1
total_auc = auc_mean_cat + auc_mean_lgb + auc_mean_xgb

weight_cat = auc_mean_cat / total_auc
weight_lgb = auc_mean_lgb / total_auc
weight_xgb = auc_mean_xgb / total_auc

# Print weights for verification
print(f"Weights - CatBoost: {weight_cat:.4f}, LightGBM: {weight_lgb:.4f}, XGBoost: {weight_xgb:.4f}")

# Blending predictions with calculated weights
blended_preds = (weight_cat * test_pred_cat + weight_lgb * test_pred_lgb + weight_xgb * test_pred_xgb)

# Create the submission DataFrame
submission = pd.DataFrame({
    'id': test_df.index,
    'Response': blended_preds
})

# Save the submission file
submission.to_csv("submission.csv", index=False)

print("Submission file created successfully!")


2024-07-23 18:59:07,501 - INFO - Memory usage of dataframe is 643.68 MB
2024-07-23 18:59:08,626 - INFO - Memory usage after optimization is: 175.55 MB
2024-07-23 18:59:08,627 - INFO - Decreased by 72.7%


Weights - CatBoost: 0.3334, LightGBM: 0.3328, XGBoost: 0.3338


ValueError: All arrays must be of the same length