In [14]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Preprocessing
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, f_classif

# Models
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

# Metrics
from sklearn.metrics import f1_score, classification_report

# Imbalanced learning - IMPROVED
from imblearn.over_sampling import ADASYN  # Better than SMOTE!
from imblearn.combine import SMOTETomek

import optuna
from optuna.samplers import TPESampler

print("=" * 80)
print("PLAYER SEGMENT CLASSIFICATION PIPELINE")
print("=" * 80)

PLAYER SEGMENT CLASSIFICATION PIPELINE


1. LOAD AND PREPROCESS DATA (Same as main pipeline)

In [3]:
print("\n[1] Loading and Preprocessing Data...")

train = pd.read_csv('C:\\Users\\Kanyavan\\Documents\\Year3_semester1\\ML\\CPE342-Hackathon\\task2\\train.csv')
train.info()


[1] Loading and Preprocessing Data...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101658 entries, 0 to 101657
Data columns (total 47 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   id                                101658 non-null  object 
 1   player_id                         101658 non-null  object 
 2   play_frequency                    93785 non-null   float64
 3   avg_session_duration              93771 non-null   float64
 4   total_playtime_hours              93887 non-null   float64
 5   login_streak                      93938 non-null   float64
 6   days_since_last_login             93878 non-null   float64
 7   total_spending_thb                93902 non-null   float64
 8   avg_monthly_spending              93923 non-null   float64
 9   spending_frequency                93898 non-null   float64
 10  friend_count                      93720 non-null   float64
 11  team_play_per

In [4]:
print(f"Train shape: {train.shape}")
print(f"Target distribution:\n{train['segment'].value_counts().sort_index()}")

Train shape: (101658, 47)
Target distribution:
segment
0    40064
1    25397
2    20549
3    15648
Name: count, dtype: int64


In [5]:
X_train = train.drop(['id', 'player_id', 'segment'], axis=1)
y_train = train['segment'].astype(int)

In [None]:
#-------------------------------------------------FIRST-----------------------------------------------------------------------------
# Feature Engineering (simplified version - use full version from main script)
def engineer_features(df):
    df = df.copy()

    # Spending features
    df['spending_per_hour'] = df['total_spending_thb'] / (df['total_playtime_hours'] + 1)
    df['spending_intensity'] = df['avg_monthly_spending'] * df['spending_frequency']
    df['is_spender'] = (df['total_spending_thb'] > 0).astype(int)

    # Engagement features
    df['engagement_score'] = (df['play_frequency'] * df['avg_session_duration'] * df['login_streak']).fillna(0)
    df['activity_consistency'] = df['login_streak'] / (df['days_since_last_login'] + 1)

    # Social features
    df['social_score'] = (df['friend_count'] * df['team_play_percentage'] * df['chat_activity_score']).fillna(0)
    df['is_social_player'] = (df['team_play_percentage'] > 50).astype(int)

    # Competitive features
    df['competitive_score'] = (df['ranked_participation_rate'] * df['tournament_entries'] * df['competitive_rank']).fillna(0)
    df['is_competitive'] = (df['ranked_participation_rate'] > 50).astype(int)

    # Remove random metrics
    df = df.drop(['random_metric_1', 'random_metric_2', 'random_metric_3'], axis=1, errors='ignore')

    return df

X_train_eng = engineer_features(X_train)
#-------------------------------------------------FIRST-----------------------------------------------------------------------------

In [None]:
#-------------------------------------------------FIRST-----------------------------------------------------------------------------
# Preprocessing
numerical_features = X_train_eng.select_dtypes(include=[np.number]).columns.tolist()
categorical_features = X_train_eng.select_dtypes(include=['object']).columns.tolist()
#-------------------------------------------------FIRST-----------------------------------------------------------------------------

In [12]:
# Label Encoding
for col in categorical_features:
    le = LabelEncoder()
    X_train_eng[col] = le.fit_transform(X_train_eng[col].astype(str))

In [13]:
# Imputation
num_imputer = SimpleImputer(strategy='median')
X_train_eng[numerical_features] = num_imputer.fit_transform(X_train_eng[numerical_features])

if categorical_features:
    cat_imputer = SimpleImputer(strategy='most_frequent')
    X_train_eng[categorical_features] = cat_imputer.fit_transform(X_train_eng[categorical_features])

In [14]:
# Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_eng)

In [15]:
# SMOTE
smote = SMOTE(random_state=42, k_neighbors=5)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train_scaled, y_train)

print(f"Data prepared: {X_train_balanced.shape}")

Data prepared: (160256, 50)


2. OPTUNA HYPERPARAMETER TUNING

In [16]:
# Define cross-validation strategy
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

2.1 XGBoost Hyperparameter Tuning

In [17]:
def objective_xgb(trial):
    """Objective function for XGBoost optimization"""
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 10),
        'random_state': 42,
        'eval_metric': 'mlogloss',
        'tree_method': 'hist'
    }

    model = XGBClassifier(**params)
    scores = cross_val_score(model, X_train_balanced, y_train_balanced,
                            cv=skf, scoring='f1_macro', n_jobs=2)

    return scores.mean()

In [18]:
# Run optimization
study_xgb = optuna.create_study(
    direction='maximize',
    sampler=TPESampler(seed=42),
    study_name='XGBoost_Optimization'
)

study_xgb.optimize(objective_xgb, n_trials=20, show_progress_bar=True)

print(f"\nBest XGBoost F1-Score: {study_xgb.best_value:.4f}")
print("Best XGBoost Parameters:")
for key, value in study_xgb.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-20 07:16:19,931] A new study created in memory with name: XGBoost_Optimization
Best trial: 0. Best value: 0.811537:   5%|▌         | 1/20 [01:19<25:08, 79.41s/it]

[I 2025-11-20 07:17:39,345] Trial 0 finished with value: 0.81153703799887 and parameters: {'n_estimators': 250, 'max_depth': 8, 'learning_rate': 0.1205712628744377, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'min_child_weight': 2, 'gamma': 0.2904180608409973, 'reg_alpha': 8.661761457749352, 'reg_lambda': 6.011150117432088}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  10%|█         | 2/20 [02:06<18:03, 60.17s/it]

[I 2025-11-20 07:18:26,042] Trial 1 finished with value: 0.7964559307964221 and parameters: {'n_estimators': 383, 'max_depth': 3, 'learning_rate': 0.2708160864249968, 'subsample': 0.9329770563201687, 'colsample_bytree': 0.6849356442713105, 'min_child_weight': 2, 'gamma': 0.9170225492671691, 'reg_alpha': 3.0424224295953772, 'reg_lambda': 5.247564316322379}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  15%|█▌        | 3/20 [02:47<14:35, 51.50s/it]

[I 2025-11-20 07:19:07,219] Trial 2 finished with value: 0.7865061780169423 and parameters: {'n_estimators': 273, 'max_depth': 4, 'learning_rate': 0.08012737503998542, 'subsample': 0.6557975442608167, 'colsample_bytree': 0.7168578594140873, 'min_child_weight': 4, 'gamma': 2.28034992108518, 'reg_alpha': 7.851759613930136, 'reg_lambda': 1.9967378215835974}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  20%|██        | 4/20 [04:00<16:02, 60.18s/it]

[I 2025-11-20 07:20:20,701] Trial 3 finished with value: 0.759840248599127 and parameters: {'n_estimators': 306, 'max_depth': 6, 'learning_rate': 0.011711509955524094, 'subsample': 0.8430179407605753, 'colsample_bytree': 0.6682096494749166, 'min_child_weight': 1, 'gamma': 4.7444276862666666, 'reg_alpha': 9.656320330745594, 'reg_lambda': 8.08397348116461}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  25%|██▌       | 5/20 [04:33<12:32, 50.14s/it]

[I 2025-11-20 07:20:53,048] Trial 4 finished with value: 0.7784933563907311 and parameters: {'n_estimators': 222, 'max_depth': 3, 'learning_rate': 0.1024932221692416, 'subsample': 0.7760609974958406, 'colsample_bytree': 0.6488152939379115, 'min_child_weight': 5, 'gamma': 0.17194260557609198, 'reg_alpha': 9.093204020787821, 'reg_lambda': 2.587799816000169}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  30%|███       | 6/20 [05:24<11:47, 50.51s/it]

[I 2025-11-20 07:21:44,267] Trial 5 finished with value: 0.7849557604477317 and parameters: {'n_estimators': 365, 'max_depth': 4, 'learning_rate': 0.05864129169696527, 'subsample': 0.8186841117373118, 'colsample_bytree': 0.6739417822102108, 'min_child_weight': 10, 'gamma': 3.8756641168055728, 'reg_alpha': 9.394989415641891, 'reg_lambda': 8.948273504276488}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  35%|███▌      | 7/20 [06:48<13:19, 61.49s/it]

[I 2025-11-20 07:23:08,365] Trial 6 finished with value: 0.7835105269544839 and parameters: {'n_estimators': 339, 'max_depth': 8, 'learning_rate': 0.01351182947645082, 'subsample': 0.6783931449676581, 'colsample_bytree': 0.6180909155642152, 'min_child_weight': 4, 'gamma': 1.9433864484474102, 'reg_alpha': 2.713490317738959, 'reg_lambda': 8.287375091519294}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  40%|████      | 8/20 [07:19<10:22, 51.89s/it]

[I 2025-11-20 07:23:39,700] Trial 7 finished with value: 0.7789816827713364 and parameters: {'n_estimators': 243, 'max_depth': 4, 'learning_rate': 0.06333268775321843, 'subsample': 0.6563696899899051, 'colsample_bytree': 0.9208787923016158, 'min_child_weight': 1, 'gamma': 4.9344346830025865, 'reg_alpha': 7.722447692966574, 'reg_lambda': 1.987156815341724}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  45%|████▌     | 9/20 [07:42<07:51, 42.85s/it]

[I 2025-11-20 07:24:02,676] Trial 8 finished with value: 0.7939694202981846 and parameters: {'n_estimators': 102, 'max_depth': 7, 'learning_rate': 0.11069143219393454, 'subsample': 0.8916028672163949, 'colsample_bytree': 0.9085081386743783, 'min_child_weight': 1, 'gamma': 1.7923286427213632, 'reg_alpha': 1.1586905952512971, 'reg_lambda': 8.631034258755935}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  50%|█████     | 10/20 [08:23<07:01, 42.14s/it]

[I 2025-11-20 07:24:43,225] Trial 9 finished with value: 0.7382782379096311 and parameters: {'n_estimators': 349, 'max_depth': 4, 'learning_rate': 0.012413189635294229, 'subsample': 0.7243929286862649, 'colsample_bytree': 0.7300733288106989, 'min_child_weight': 8, 'gamma': 3.1877873567760657, 'reg_alpha': 8.872127425763265, 'reg_lambda': 4.722149251619493}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  55%|█████▌    | 11/20 [10:29<10:12, 68.00s/it]

[I 2025-11-20 07:26:49,877] Trial 10 finished with value: 0.8024720612379574 and parameters: {'n_estimators': 478, 'max_depth': 8, 'learning_rate': 0.02847748683027988, 'subsample': 0.9729161367647149, 'colsample_bytree': 0.8085360047450805, 'min_child_weight': 7, 'gamma': 0.0757211855137844, 'reg_alpha': 6.233765186294655, 'reg_lambda': 5.7054786488168325}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  60%|██████    | 12/20 [12:44<11:45, 88.14s/it]

[I 2025-11-20 07:29:04,059] Trial 11 finished with value: 0.8021936320830619 and parameters: {'n_estimators': 499, 'max_depth': 8, 'learning_rate': 0.0262209763647716, 'subsample': 0.9845355085773861, 'colsample_bytree': 0.8243626807262747, 'min_child_weight': 7, 'gamma': 0.055111967492394796, 'reg_alpha': 5.732909776512291, 'reg_lambda': 5.916836359300596}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  65%|██████▌   | 13/20 [14:17<10:28, 89.81s/it]

[I 2025-11-20 07:30:37,710] Trial 12 finished with value: 0.7981551742379797 and parameters: {'n_estimators': 476, 'max_depth': 7, 'learning_rate': 0.031633938267485534, 'subsample': 0.9987966992361724, 'colsample_bytree': 0.7875197249298348, 'min_child_weight': 7, 'gamma': 1.041401330544119, 'reg_alpha': 5.855038899618591, 'reg_lambda': 6.56116398580863}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  70%|███████   | 14/20 [14:49<07:13, 72.23s/it]

[I 2025-11-20 07:31:09,321] Trial 13 finished with value: 0.8076417547346798 and parameters: {'n_estimators': 171, 'max_depth': 7, 'learning_rate': 0.23586553545554284, 'subsample': 0.8923645209213664, 'colsample_bytree': 0.8340263727840078, 'min_child_weight': 9, 'gamma': 0.9453049307333259, 'reg_alpha': 6.882295119501993, 'reg_lambda': 4.181407616277676}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  75%|███████▌  | 15/20 [15:16<04:53, 58.76s/it]

[I 2025-11-20 07:31:36,854] Trial 14 finished with value: 0.8032949224367885 and parameters: {'n_estimators': 163, 'max_depth': 6, 'learning_rate': 0.2236154456796527, 'subsample': 0.8865749354072971, 'colsample_bytree': 0.9984727629873955, 'min_child_weight': 10, 'gamma': 1.0110772545056612, 'reg_alpha': 6.8915582298486235, 'reg_lambda': 3.6340653237477847}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  80%|████████  | 16/20 [15:48<03:22, 50.70s/it]

[I 2025-11-20 07:32:08,836] Trial 15 finished with value: 0.8052156744821246 and parameters: {'n_estimators': 155, 'max_depth': 7, 'learning_rate': 0.16988097423979995, 'subsample': 0.7641465924431671, 'colsample_bytree': 0.8791840665311281, 'min_child_weight': 3, 'gamma': 1.4263797965515426, 'reg_alpha': 4.068837327056717, 'reg_lambda': 3.998659922894784}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  85%|████████▌ | 17/20 [16:15<02:10, 43.51s/it]

[I 2025-11-20 07:32:35,639] Trial 16 finished with value: 0.7975314078001249 and parameters: {'n_estimators': 179, 'max_depth': 7, 'learning_rate': 0.1642253352275651, 'subsample': 0.8659515464845774, 'colsample_bytree': 0.7584920113121396, 'min_child_weight': 9, 'gamma': 2.817336352628225, 'reg_alpha': 7.694337028535759, 'reg_lambda': 0.13179214380923554}. Best is trial 0 with value: 0.81153703799887.


Best trial: 0. Best value: 0.811537:  90%|█████████ | 18/20 [16:35<01:12, 36.48s/it]

[I 2025-11-20 07:32:55,758] Trial 17 finished with value: 0.7925049805739734 and parameters: {'n_estimators': 110, 'max_depth': 6, 'learning_rate': 0.14290274997706512, 'subsample': 0.9285086088875721, 'colsample_bytree': 0.8505915654206465, 'min_child_weight': 6, 'gamma': 0.6234135690000955, 'reg_alpha': 4.916273921024845, 'reg_lambda': 7.165494609738724}. Best is trial 0 with value: 0.81153703799887.


Best trial: 18. Best value: 0.81231:  95%|█████████▌| 19/20 [17:23<00:39, 39.95s/it]

[I 2025-11-20 07:33:43,779] Trial 18 finished with value: 0.8123102421326547 and parameters: {'n_estimators': 214, 'max_depth': 8, 'learning_rate': 0.2810657274670111, 'subsample': 0.6026168324121943, 'colsample_bytree': 0.6075934324519439, 'min_child_weight': 5, 'gamma': 0.553617064887681, 'reg_alpha': 8.157298373095427, 'reg_lambda': 3.7834031171882017}. Best is trial 18 with value: 0.8123102421326547.


Best trial: 18. Best value: 0.81231: 100%|██████████| 20/20 [18:15<00:00, 54.79s/it]

[I 2025-11-20 07:34:35,820] Trial 19 finished with value: 0.7914299999384486 and parameters: {'n_estimators': 212, 'max_depth': 8, 'learning_rate': 0.03910891361220486, 'subsample': 0.6172803398163924, 'colsample_bytree': 0.6216495646994378, 'min_child_weight': 3, 'gamma': 0.471058024403758, 'reg_alpha': 9.984912975919531, 'reg_lambda': 0.43357095923358635}. Best is trial 18 with value: 0.8123102421326547.

Best XGBoost F1-Score: 0.8123
Best XGBoost Parameters:
  n_estimators: 214
  max_depth: 8
  learning_rate: 0.2810657274670111
  subsample: 0.6026168324121943
  colsample_bytree: 0.6075934324519439
  min_child_weight: 5
  gamma: 0.553617064887681
  reg_alpha: 8.157298373095427
  reg_lambda: 3.7834031171882017





2.2 LightGBM Hyperparameter Tuning

In [19]:
print("\n[2.2] Tuning LightGBM Hyperparameters...")


[2.2] Tuning LightGBM Hyperparameters...


In [20]:
def objective_lgbm(trial):
    """Objective function for LightGBM optimization"""
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'num_leaves': trial.suggest_int('num_leaves', 20, 200),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 10),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 10),
        'random_state': 42,
        'verbose': -1
    }

    model = LGBMClassifier(**params)
    scores = cross_val_score(model, X_train_balanced, y_train_balanced,
                            cv=skf, scoring='f1_macro', n_jobs=2)

    return scores.mean()

In [21]:
# Run optimization
study_lgbm = optuna.create_study(
    direction='maximize',
    sampler=TPESampler(seed=42),
    study_name='LightGBM_Optimization'
)

study_lgbm.optimize(objective_lgbm, n_trials=20, show_progress_bar=True)

print(f"\nBest LightGBM F1-Score: {study_lgbm.best_value:.4f}")
print("Best LightGBM Parameters:")
for key, value in study_lgbm.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-20 07:41:11,201] A new study created in memory with name: LightGBM_Optimization
Best trial: 0. Best value: 0.803279:   5%|▌         | 1/20 [00:29<09:16, 29.30s/it]

[I 2025-11-20 07:41:40,501] Trial 0 finished with value: 0.8032788344805379 and parameters: {'n_estimators': 250, 'max_depth': 8, 'learning_rate': 0.1205712628744377, 'subsample': 0.8394633936788146, 'colsample_bytree': 0.6624074561769746, 'min_child_samples': 19, 'num_leaves': 30, 'reg_alpha': 8.661761457749352, 'reg_lambda': 6.011150117432088}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  10%|█         | 2/20 [00:51<07:31, 25.08s/it]

[I 2025-11-20 07:42:02,629] Trial 1 finished with value: 0.7984306415850317 and parameters: {'n_estimators': 383, 'max_depth': 3, 'learning_rate': 0.2708160864249968, 'subsample': 0.9329770563201687, 'colsample_bytree': 0.6849356442713105, 'min_child_samples': 22, 'num_leaves': 53, 'reg_alpha': 3.0424224295953772, 'reg_lambda': 5.247564316322379}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  15%|█▌        | 3/20 [01:11<06:30, 22.96s/it]

[I 2025-11-20 07:42:23,066] Trial 2 finished with value: 0.7926448424193241 and parameters: {'n_estimators': 273, 'max_depth': 4, 'learning_rate': 0.08012737503998542, 'subsample': 0.6557975442608167, 'colsample_bytree': 0.7168578594140873, 'min_child_samples': 40, 'num_leaves': 102, 'reg_alpha': 7.851759613930136, 'reg_lambda': 1.9967378215835974}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  20%|██        | 4/20 [01:45<07:11, 27.00s/it]

[I 2025-11-20 07:42:56,253] Trial 3 finished with value: 0.7722722218623219 and parameters: {'n_estimators': 306, 'max_depth': 6, 'learning_rate': 0.011711509955524094, 'subsample': 0.8430179407605753, 'colsample_bytree': 0.6682096494749166, 'min_child_samples': 11, 'num_leaves': 191, 'reg_alpha': 9.656320330745594, 'reg_lambda': 8.08397348116461}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  25%|██▌       | 5/20 [01:58<05:32, 22.19s/it]

[I 2025-11-20 07:43:09,913] Trial 4 finished with value: 0.786453841255557 and parameters: {'n_estimators': 222, 'max_depth': 3, 'learning_rate': 0.1024932221692416, 'subsample': 0.7760609974958406, 'colsample_bytree': 0.6488152939379115, 'min_child_samples': 52, 'num_leaves': 26, 'reg_alpha': 9.093204020787821, 'reg_lambda': 2.587799816000169}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  30%|███       | 6/20 [02:24<05:30, 23.58s/it]

[I 2025-11-20 07:43:36,186] Trial 5 finished with value: 0.7921093377519909 and parameters: {'n_estimators': 365, 'max_depth': 4, 'learning_rate': 0.05864129169696527, 'subsample': 0.8186841117373118, 'colsample_bytree': 0.6739417822102108, 'min_child_samples': 98, 'num_leaves': 160, 'reg_alpha': 9.394989415641891, 'reg_lambda': 8.948273504276488}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  35%|███▌      | 7/20 [03:16<07:06, 32.78s/it]

[I 2025-11-20 07:44:27,920] Trial 6 finished with value: 0.7905080576989054 and parameters: {'n_estimators': 339, 'max_depth': 8, 'learning_rate': 0.01351182947645082, 'subsample': 0.6783931449676581, 'colsample_bytree': 0.6180909155642152, 'min_child_samples': 36, 'num_leaves': 90, 'reg_alpha': 2.713490317738959, 'reg_lambda': 8.287375091519294}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  40%|████      | 8/20 [03:38<05:52, 29.33s/it]

[I 2025-11-20 07:44:49,872] Trial 7 finished with value: 0.7876260079578006 and parameters: {'n_estimators': 243, 'max_depth': 4, 'learning_rate': 0.06333268775321843, 'subsample': 0.6563696899899051, 'colsample_bytree': 0.9208787923016158, 'min_child_samples': 12, 'num_leaves': 198, 'reg_alpha': 7.722447692966574, 'reg_lambda': 1.987156815341724}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  45%|████▌     | 9/20 [03:57<04:45, 25.98s/it]

[I 2025-11-20 07:45:08,468] Trial 8 finished with value: 0.7996849826530116 and parameters: {'n_estimators': 102, 'max_depth': 7, 'learning_rate': 0.11069143219393454, 'subsample': 0.8916028672163949, 'colsample_bytree': 0.9085081386743783, 'min_child_samples': 12, 'num_leaves': 84, 'reg_alpha': 1.1586905952512971, 'reg_lambda': 8.631034258755935}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  50%|█████     | 10/20 [04:26<04:31, 27.12s/it]

[I 2025-11-20 07:45:38,150] Trial 9 finished with value: 0.7561393244925265 and parameters: {'n_estimators': 349, 'max_depth': 4, 'learning_rate': 0.012413189635294229, 'subsample': 0.7243929286862649, 'colsample_bytree': 0.7300733288106989, 'min_child_samples': 75, 'num_leaves': 135, 'reg_alpha': 8.872127425763265, 'reg_lambda': 4.722149251619493}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 0. Best value: 0.803279:  55%|█████▌    | 11/20 [05:19<05:12, 34.75s/it]

[I 2025-11-20 07:46:30,207] Trial 10 finished with value: 0.794409230955779 and parameters: {'n_estimators': 478, 'max_depth': 8, 'learning_rate': 0.02847748683027988, 'subsample': 0.9729161367647149, 'colsample_bytree': 0.8085360047450805, 'min_child_samples': 71, 'num_leaves': 22, 'reg_alpha': 6.233765186294655, 'reg_lambda': 5.7054786488168325}. Best is trial 0 with value: 0.8032788344805379.


Best trial: 11. Best value: 0.804004:  60%|██████    | 12/20 [05:36<03:57, 29.64s/it]

[I 2025-11-20 07:46:48,147] Trial 11 finished with value: 0.8040042770679403 and parameters: {'n_estimators': 102, 'max_depth': 7, 'learning_rate': 0.19265640527744937, 'subsample': 0.8794972169146561, 'colsample_bytree': 0.9571939572469175, 'min_child_samples': 27, 'num_leaves': 67, 'reg_alpha': 0.7461817227902943, 'reg_lambda': 9.98358297559386}. Best is trial 11 with value: 0.8040042770679403.


Best trial: 12. Best value: 0.806032:  65%|██████▌   | 13/20 [05:55<03:03, 26.19s/it]

[I 2025-11-20 07:47:06,417] Trial 12 finished with value: 0.8060322777827202 and parameters: {'n_estimators': 104, 'max_depth': 7, 'learning_rate': 0.22233691449613796, 'subsample': 0.8758146404482569, 'colsample_bytree': 0.9978524426860315, 'min_child_samples': 30, 'num_leaves': 64, 'reg_alpha': 4.771549442410068, 'reg_lambda': 6.678787164688568}. Best is trial 12 with value: 0.8060322777827202.


Best trial: 12. Best value: 0.806032:  70%|███████   | 14/20 [06:11<02:18, 23.15s/it]

[I 2025-11-20 07:47:22,537] Trial 13 finished with value: 0.8032992562979508 and parameters: {'n_estimators': 107, 'max_depth': 6, 'learning_rate': 0.29169654074088264, 'subsample': 0.9003813532509073, 'colsample_bytree': 0.9943297068819146, 'min_child_samples': 35, 'num_leaves': 63, 'reg_alpha': 0.4635155182061384, 'reg_lambda': 9.962989421056246}. Best is trial 12 with value: 0.8060322777827202.


Best trial: 14. Best value: 0.810296:  75%|███████▌  | 15/20 [06:38<02:02, 24.51s/it]

[I 2025-11-20 07:47:50,184] Trial 14 finished with value: 0.8102958096022055 and parameters: {'n_estimators': 164, 'max_depth': 7, 'learning_rate': 0.1826614526342363, 'subsample': 0.9984067957444853, 'colsample_bytree': 0.9997893131179902, 'min_child_samples': 51, 'num_leaves': 70, 'reg_alpha': 5.1393516891854345, 'reg_lambda': 7.049270826015105}. Best is trial 14 with value: 0.8102958096022055.


Best trial: 15. Best value: 0.811124:  80%|████████  | 16/20 [07:07<01:42, 25.64s/it]

[I 2025-11-20 07:48:18,439] Trial 15 finished with value: 0.8111242718020497 and parameters: {'n_estimators': 176, 'max_depth': 7, 'learning_rate': 0.14920643617808255, 'subsample': 0.9750244637464088, 'colsample_bytree': 0.8516529430033545, 'min_child_samples': 55, 'num_leaves': 123, 'reg_alpha': 4.257738884256797, 'reg_lambda': 7.202107114363317}. Best is trial 15 with value: 0.8111242718020497.


Best trial: 15. Best value: 0.811124:  85%|████████▌ | 17/20 [07:31<01:15, 25.10s/it]

[I 2025-11-20 07:48:42,307] Trial 16 finished with value: 0.8077074698629827 and parameters: {'n_estimators': 184, 'max_depth': 6, 'learning_rate': 0.16977074027978808, 'subsample': 0.9946602206724103, 'colsample_bytree': 0.8301537673031288, 'min_child_samples': 60, 'num_leaves': 138, 'reg_alpha': 5.098091147793574, 'reg_lambda': 3.583902847905419}. Best is trial 15 with value: 0.8111242718020497.


Best trial: 15. Best value: 0.811124:  90%|█████████ | 18/20 [07:51<00:47, 23.57s/it]

[I 2025-11-20 07:49:02,306] Trial 17 finished with value: 0.7772542229730912 and parameters: {'n_estimators': 171, 'max_depth': 5, 'learning_rate': 0.03721349294209224, 'subsample': 0.9579190418236523, 'colsample_bytree': 0.8758697700847069, 'min_child_samples': 49, 'num_leaves': 118, 'reg_alpha': 4.026609486224885, 'reg_lambda': 6.946016703174446}. Best is trial 15 with value: 0.8111242718020497.


Best trial: 15. Best value: 0.811124:  95%|█████████▌| 19/20 [08:18<00:24, 24.63s/it]

[I 2025-11-20 07:49:29,394] Trial 18 finished with value: 0.8106399626475481 and parameters: {'n_estimators': 182, 'max_depth': 7, 'learning_rate': 0.15000385515031378, 'subsample': 0.9299305629337544, 'colsample_bytree': 0.7602802489247009, 'min_child_samples': 67, 'num_leaves': 164, 'reg_alpha': 6.435468714317403, 'reg_lambda': 7.1058773596334}. Best is trial 15 with value: 0.8111242718020497.


Best trial: 15. Best value: 0.811124: 100%|██████████| 20/20 [08:39<00:00, 25.99s/it]

[I 2025-11-20 07:49:50,917] Trial 19 finished with value: 0.784390637487131 and parameters: {'n_estimators': 209, 'max_depth': 5, 'learning_rate': 0.03910891361220486, 'subsample': 0.9394691950251458, 'colsample_bytree': 0.7640881690357239, 'min_child_samples': 86, 'num_leaves': 169, 'reg_alpha': 6.538839629254709, 'reg_lambda': 0.43357095923358635}. Best is trial 15 with value: 0.8111242718020497.

Best LightGBM F1-Score: 0.8111
Best LightGBM Parameters:
  n_estimators: 176
  max_depth: 7
  learning_rate: 0.14920643617808255
  subsample: 0.9750244637464088
  colsample_bytree: 0.8516529430033545
  min_child_samples: 55
  num_leaves: 123
  reg_alpha: 4.257738884256797
  reg_lambda: 7.202107114363317





2.3 CatBoost Hyperparameter Tuning

In [22]:
print("\n[2.3] Tuning CatBoost Hyperparameters...")


[2.3] Tuning CatBoost Hyperparameters...


In [23]:
def objective_catboost(trial):
    """Objective function for CatBoost optimization"""
    params = {
        'iterations': trial.suggest_int('iterations', 100, 500),
        'depth': trial.suggest_int('depth', 3, 8),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'random_strength': trial.suggest_float('random_strength', 0, 10),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1),
        'random_state': 42,
        'verbose': False
    }

    model = CatBoostClassifier(**params)
    scores = cross_val_score(model, X_train_balanced, y_train_balanced,
                            cv=skf, scoring='f1_macro', n_jobs=2)

    return scores.mean()


In [25]:
# Run optimization
study_catboost = optuna.create_study(
    direction='maximize',
    sampler=TPESampler(seed=42),
    study_name='CatBoost_Optimization'
)

study_catboost.optimize(objective_catboost, n_trials=20, show_progress_bar=True)

print(f"\nBest CatBoost F1-Score: {study_catboost.best_value:.4f}")
print("Best CatBoost Parameters:")
for key, value in study_catboost.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-20 08:01:28,827] A new study created in memory with name: CatBoost_Optimization
Best trial: 0. Best value: 0.796945:   5%|▌         | 1/20 [02:11<41:45, 131.86s/it]

[I 2025-11-20 08:03:40,691] Trial 0 finished with value: 0.79694460970667 and parameters: {'iterations': 250, 'depth': 8, 'learning_rate': 0.1205712628744377, 'l2_leaf_reg': 6.387926357773329, 'border_count': 66, 'random_strength': 1.5599452033620265, 'bagging_temperature': 0.05808361216819946}. Best is trial 0 with value: 0.79694460970667.


Best trial: 1. Best value: 0.797855:  10%|█         | 2/20 [03:52<34:01, 113.44s/it]

[I 2025-11-20 08:05:21,232] Trial 1 finished with value: 0.797855159973342 and parameters: {'iterations': 447, 'depth': 6, 'learning_rate': 0.11114989443094977, 'l2_leaf_reg': 1.185260448662222, 'border_count': 249, 'random_strength': 8.324426408004218, 'bagging_temperature': 0.21233911067827616}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  15%|█▌        | 3/20 [04:24<21:38, 76.36s/it] 

[I 2025-11-20 08:05:53,461] Trial 2 finished with value: 0.7366300131919442 and parameters: {'iterations': 172, 'depth': 4, 'learning_rate': 0.028145092716060652, 'l2_leaf_reg': 5.72280788469014, 'border_count': 128, 'random_strength': 2.9122914019804194, 'bagging_temperature': 0.6118528947223795}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  20%|██        | 4/20 [04:54<15:30, 58.16s/it]

[I 2025-11-20 08:06:23,733] Trial 3 finished with value: 0.740583279066646 and parameters: {'iterations': 155, 'depth': 4, 'learning_rate': 0.03476649150592621, 'l2_leaf_reg': 5.104629857953324, 'border_count': 207, 'random_strength': 1.9967378215835974, 'bagging_temperature': 0.5142344384136116}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  25%|██▌       | 5/20 [05:43<13:39, 54.66s/it]

[I 2025-11-20 08:07:12,189] Trial 4 finished with value: 0.77757432101406 and parameters: {'iterations': 337, 'depth': 3, 'learning_rate': 0.07896186801026692, 'l2_leaf_reg': 2.5347171131856236, 'border_count': 46, 'random_strength': 9.488855372533333, 'bagging_temperature': 0.9656320330745594}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  30%|███       | 6/20 [06:59<14:27, 61.97s/it]

[I 2025-11-20 08:08:28,351] Trial 5 finished with value: 0.745180633499161 and parameters: {'iterations': 424, 'depth': 4, 'learning_rate': 0.013940346079873234, 'l2_leaf_reg': 7.158097238609412, 'border_count': 130, 'random_strength': 1.2203823484477883, 'bagging_temperature': 0.4951769101112702}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  35%|███▌      | 7/20 [08:07<13:51, 63.97s/it]

[I 2025-11-20 08:09:36,432] Trial 6 finished with value: 0.737340599620157 and parameters: {'iterations': 113, 'depth': 8, 'learning_rate': 0.024112898115291985, 'l2_leaf_reg': 6.962700559185838, 'border_count': 101, 'random_strength': 5.200680211778108, 'bagging_temperature': 0.5467102793432796}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  40%|████      | 8/20 [10:15<16:50, 84.20s/it]

[I 2025-11-20 08:11:43,966] Trial 7 finished with value: 0.7904561495419445 and parameters: {'iterations': 174, 'depth': 8, 'learning_rate': 0.13962563737015762, 'l2_leaf_reg': 9.455490474077703, 'border_count': 232, 'random_strength': 5.978999788110851, 'bagging_temperature': 0.9218742350231168}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  45%|████▌     | 9/20 [10:42<12:11, 66.47s/it]

[I 2025-11-20 08:12:11,439] Trial 8 finished with value: 0.6777411886468422 and parameters: {'iterations': 135, 'depth': 4, 'learning_rate': 0.011662890273931383, 'l2_leaf_reg': 3.927972976869379, 'border_count': 119, 'random_strength': 2.713490317738959, 'bagging_temperature': 0.8287375091519293}. Best is trial 1 with value: 0.797855159973342.


Best trial: 1. Best value: 0.797855:  50%|█████     | 10/20 [11:25<09:52, 59.27s/it]

[I 2025-11-20 08:12:54,580] Trial 9 finished with value: 0.7746863053948916 and parameters: {'iterations': 243, 'depth': 4, 'learning_rate': 0.06333268775321843, 'l2_leaf_reg': 2.2683180247728636, 'border_count': 211, 'random_strength': 0.7455064367977082, 'bagging_temperature': 0.9868869366005173}. Best is trial 1 with value: 0.797855159973342.


Best trial: 10. Best value: 0.804329:  55%|█████▌    | 11/20 [13:11<11:01, 73.51s/it]

[I 2025-11-20 08:14:40,400] Trial 10 finished with value: 0.8043285849884845 and parameters: {'iterations': 492, 'depth': 6, 'learning_rate': 0.2704729722717776, 'l2_leaf_reg': 1.1616568805333802, 'border_count': 175, 'random_strength': 9.076647952825176, 'bagging_temperature': 0.08088762637384686}. Best is trial 10 with value: 0.8043285849884845.


Best trial: 10. Best value: 0.804329:  60%|██████    | 12/20 [14:56<11:04, 83.02s/it]

[I 2025-11-20 08:16:25,170] Trial 11 finished with value: 0.8038394362603329 and parameters: {'iterations': 493, 'depth': 6, 'learning_rate': 0.2974249886097731, 'l2_leaf_reg': 1.0787263027174963, 'border_count': 179, 'random_strength': 9.66553648164272, 'bagging_temperature': 0.06827047002751116}. Best is trial 10 with value: 0.8043285849884845.


Best trial: 10. Best value: 0.804329:  65%|██████▌   | 13/20 [16:42<10:30, 90.10s/it]

[I 2025-11-20 08:18:11,561] Trial 12 finished with value: 0.803458096251255 and parameters: {'iterations': 498, 'depth': 6, 'learning_rate': 0.295154220868735, 'l2_leaf_reg': 1.0379675913744488, 'border_count': 181, 'random_strength': 7.5876292750655345, 'bagging_temperature': 0.07131914616397554}. Best is trial 10 with value: 0.8043285849884845.


Best trial: 13. Best value: 0.804421:  70%|███████   | 14/20 [45:38<58:43, 587.24s/it]

[I 2025-11-20 08:47:07,559] Trial 13 finished with value: 0.8044206198911465 and parameters: {'iterations': 376, 'depth': 7, 'learning_rate': 0.2599957906944455, 'l2_leaf_reg': 3.3122077050601906, 'border_count': 157, 'random_strength': 9.919547750113695, 'bagging_temperature': 0.22713901165087858}. Best is trial 13 with value: 0.8044206198911465.


Best trial: 13. Best value: 0.804421:  75%|███████▌  | 15/20 [47:15<36:37, 439.48s/it]

[I 2025-11-20 08:48:44,600] Trial 14 finished with value: 0.8014341026828788 and parameters: {'iterations': 361, 'depth': 7, 'learning_rate': 0.1887279139198364, 'l2_leaf_reg': 3.519918777432741, 'border_count': 164, 'random_strength': 7.219596994112967, 'bagging_temperature': 0.3122349359221165}. Best is trial 13 with value: 0.8044206198911465.


Best trial: 13. Best value: 0.804421:  80%|████████  | 16/20 [49:00<22:35, 338.80s/it]

[I 2025-11-20 08:50:29,583] Trial 15 finished with value: 0.8029125100612543 and parameters: {'iterations': 392, 'depth': 7, 'learning_rate': 0.19163228103827748, 'l2_leaf_reg': 3.825598252559864, 'border_count': 162, 'random_strength': 9.994687161020684, 'bagging_temperature': 0.2684528070017671}. Best is trial 13 with value: 0.8044206198911465.


Best trial: 13. Best value: 0.804421:  85%|████████▌ | 17/20 [50:14<12:57, 259.02s/it]

[I 2025-11-20 08:51:43,078] Trial 16 finished with value: 0.8006685294920188 and parameters: {'iterations': 294, 'depth': 7, 'learning_rate': 0.20781227979629854, 'l2_leaf_reg': 2.579713900164183, 'border_count': 92, 'random_strength': 8.34903372284339, 'bagging_temperature': 0.3739777057415845}. Best is trial 13 with value: 0.8044206198911465.


Best trial: 13. Best value: 0.804421:  90%|█████████ | 18/20 [51:36<06:51, 205.88s/it]

[I 2025-11-20 08:53:05,246] Trial 17 finished with value: 0.783511441912777 and parameters: {'iterations': 443, 'depth': 5, 'learning_rate': 0.04848754515310091, 'l2_leaf_reg': 4.747699312717497, 'border_count': 151, 'random_strength': 6.4931800423011925, 'bagging_temperature': 0.1797798214520778}. Best is trial 13 with value: 0.8044206198911465.


Best trial: 13. Best value: 0.804421:  95%|█████████▌| 19/20 [52:37<02:42, 162.33s/it]

[I 2025-11-20 08:54:06,142] Trial 18 finished with value: 0.7882312439750974 and parameters: {'iterations': 317, 'depth': 5, 'learning_rate': 0.08673832921866055, 'l2_leaf_reg': 2.1940896313268574, 'border_count': 198, 'random_strength': 3.8876919478638623, 'bagging_temperature': 0.3944294945377801}. Best is trial 13 with value: 0.8044206198911465.


Best trial: 13. Best value: 0.804421: 100%|██████████| 20/20 [54:28<00:00, 163.44s/it]

[I 2025-11-20 08:55:57,571] Trial 19 finished with value: 0.8008523908768035 and parameters: {'iterations': 387, 'depth': 7, 'learning_rate': 0.16299485419239088, 'l2_leaf_reg': 8.66365647571051, 'border_count': 184, 'random_strength': 8.512079589812016, 'bagging_temperature': 0.1527105193948213}. Best is trial 13 with value: 0.8044206198911465.

Best CatBoost F1-Score: 0.8044
Best CatBoost Parameters:
  iterations: 376
  depth: 7
  learning_rate: 0.2599957906944455
  l2_leaf_reg: 3.3122077050601906
  border_count: 157
  random_strength: 9.919547750113695
  bagging_temperature: 0.22713901165087858





3. SAVE BEST PARAMETERS

In [26]:
print("\n[3] Saving Best Parameters...")


[3] Saving Best Parameters...


In [27]:
best_params = {
    'XGBoost': study_xgb.best_params,
    'LightGBM': study_lgbm.best_params,
    'CatBoost': study_catboost.best_params
}

best_scores = {
    'XGBoost': study_xgb.best_value,
    'LightGBM': study_lgbm.best_value,
    'CatBoost': study_catboost.best_value
}

# Save to file
import json

with open('best_hyperparameters.json', 'w') as f:
    json.dump(best_params, f, indent=4)

print("\nBest parameters saved to 'best_hyperparameters.json'")


Best parameters saved to 'best_hyperparameters.json'


4. SUMMARY

In [28]:
print("\n" + "=" * 80)
print("HYPERPARAMETER TUNING SUMMARY")
print("=" * 80)

for model_name, score in best_scores.items():
    print(f"{model_name:20s} | Best F1-Score: {score:.4f}")

best_model = max(best_scores, key=best_scores.get)
print(f"\nBest Overall Model: {best_model} with F1-Score: {best_scores[best_model]:.4f}")

print("\n" + "=" * 80)
print("INSTRUCTIONS:")
print("=" * 80)
print("1. Use the best parameters from 'best_hyperparameters.json'")
print("2. Update the main pipeline with these optimized parameters")
print("3. Retrain models and generate final predictions")
print("=" * 80)


HYPERPARAMETER TUNING SUMMARY
XGBoost              | Best F1-Score: 0.8123
LightGBM             | Best F1-Score: 0.8111
CatBoost             | Best F1-Score: 0.8044

Best Overall Model: XGBoost with F1-Score: 0.8123

INSTRUCTIONS:
1. Use the best parameters from 'best_hyperparameters.json'
2. Update the main pipeline with these optimized parameters
3. Retrain models and generate final predictions


5. TRAIN FINAL MODELS WITH BEST PARAMETERS

In [29]:
print("\n[5] Training Final Models with Best Parameters...")




[5] Training Final Models with Best Parameters...


In [30]:
# Train final models
final_models = {}

# XGBoost
xgb_params = study_xgb.best_params.copy()
xgb_params.update({'random_state': 42, 'eval_metric': 'mlogloss', 'tree_method': 'hist'})
final_models['XGBoost'] = XGBClassifier(**xgb_params)
final_models['XGBoost'].fit(X_train_balanced, y_train_balanced)

# LightGBM
lgbm_params = study_lgbm.best_params.copy()
lgbm_params.update({'random_state': 42, 'verbose': -1})
final_models['LightGBM'] = LGBMClassifier(**lgbm_params)
final_models['LightGBM'].fit(X_train_balanced, y_train_balanced)

# CatBoost
cat_params = study_catboost.best_params.copy()
cat_params.update({'random_state': 42, 'verbose': False})
final_models['CatBoost'] = CatBoostClassifier(**cat_params)
final_models['CatBoost'].fit(X_train_balanced, y_train_balanced)

print("\nFinal models trained successfully!")
print("You can now use these models in the main pipeline for predictions.")

# Save models (optional)
import pickle

for model_name, model in final_models.items():
    with open(f'model_{model_name.lower()}_optimized.pkl', 'wb') as f:
        pickle.dump(model, f)
    print(f"Saved: model_{model_name.lower()}_optimized.pkl")

print("\n" + "=" * 80)
print("HYPERPARAMETER TUNING COMPLETED!")
print("=" * 80)


Final models trained successfully!
You can now use these models in the main pipeline for predictions.
Saved: model_xgboost_optimized.pkl
Saved: model_lightgbm_optimized.pkl
Saved: model_catboost_optimized.pkl

HYPERPARAMETER TUNING COMPLETED!
