In [None]:
# optuna code for ridge model

def objective_ridge(trial):
    param = {
        'alpha': trial.suggest_float('alpha', 0.001, 100.0, log=True),
        'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sag', 'saga']),
        'random_state': 42,
        'tol': 1e-4 
    }
    
    model = Ridge(**param)
    kf = KFold(n_splits=5, shuffle=True, random_state=300)
    
    score = cross_val_score(model, X_train_preprocessed, y_train, 
                            cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)
    
    return np.sqrt(-score.mean())

sampler = optuna.samplers.TPESampler(seed=42)
study_ridge = optuna.create_study(direction='minimize', sampler=sampler)
study_ridge.optimize(objective_ridge, n_trials=30)

final_ridge = Ridge(**study_ridge.best_params, random_state=30)

final_ridge.fit(X_train_preprocessed, y_train)

y_pred_ridge = final_ridge.predict(X_test_preprocessed)
actual_test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_ridge))

print(f"Best Ridge RMSE: {study_ridge.best_value}")
print(f"Internal Test RMSE: {actual_test_rmse}")
print(f"Best Ridge params: {study_ridge.best_params}")

In [None]:
# optuna code for XGBRegressor

def objective(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 500, 3000),
        'max_depth': trial.suggest_int('max_depth', 3, 6),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0), 
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0), 
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'random_state': 42,
        'n_jobs': -1
    }
    
    model = XGBRegressor(**param)
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    score = cross_val_score(model, X_train_preprocessed, y_train,  
                            cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)
    
    return -score.mean() 

sampler = optuna.samplers.TPESampler(seed=42)
study = optuna.create_study(direction='minimize', sampler=sampler)
study.optimize(objective, n_trials=50) 

final_xgb = XGBRegressor(**study.best_params, random_state=35)
final_xgb.fit(X_train_preprocessed, y_train)

y_pred_xgb = final_xgb.predict(X_test_preprocessed)
actual_test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_xgb))


print(f"Best XGB RMSE: {study.best_value}")
print(f"Internal Test RMSE: {actual_test_rmse}")
print(f"Best XGB params: {study.best_params}")

In [None]:
# optuna code for GradientBoostingRegressor

def objective_gbr(trial):
    param = {
        'n_estimators': trial.suggest_int('n_estimators', 1000, 4000),
        'max_depth': trial.suggest_int('max_depth', 2, 6),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.2, 0.8),
        'max_features': trial.suggest_float('max_features', 0.1, 0.5),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 5, 25),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
        'random_state': 42
    }
    
    model = GradientBoostingRegressor(**param)
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    
    score = cross_val_score(model, X_train_preprocessed, y_train, 
                            cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)
    
    return -score.mean()

sampler_gbr = optuna.samplers.TPESampler(seed=42)
study_gbr = optuna.create_study(direction='minimize', sampler=sampler_gbr)
study_gbr.optimize(objective_gbr, n_trials=50)

final_gbr = GradientBoostingRegressor(**study_gbr.best_params, random_state=400)
final_gbr.fit(X_train_preprocessed, y_train)

y_pred_gbr = final_gbr.predict(X_test_preprocessed)
actual_test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_gbr))

print(f"Best GBR RMSE: {study_gbr.best_value}")
print(f"Internal Test RMSE: {actual_test_rmse}")
print(f"Best GBR Params: {study_gbr.best_params}")

In [None]:
# optuna code for LGBMRegressor

def objective_lgbm(trial):
    param = {
        'objective': 'regression',
        'metric': 'rmse',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'random_state': 45, 
        'n_estimators': trial.suggest_int('n_estimators', 2000, 4000),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 10, 50),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 30),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 10.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 10.0, log=True),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 0.8),
        'subsample': trial.suggest_float('subsample', 0.4, 1.0),
        'subsample_freq': trial.suggest_int('subsample_freq', 1, 7),
    }

    model = lgb.LGBMRegressor(**param)
    kf = KFold(n_splits=5, shuffle=True, random_state=45) 
    
    score = cross_val_score(model, X_train_preprocessed, y_train, 
                            cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)
    
    return -score.mean()

sampler_lgbm = optuna.samplers.TPESampler(seed=45) 
study_lgbm = optuna.create_study(direction='minimize', sampler=sampler_lgbm)
study_lgbm.optimize(objective_lgbm, n_trials=50)

final_lgbm = lgb.LGBMRegressor(**study_lgbm.best_params, random_state=400, verbose=-1)
final_lgbm.fit(X_train_preprocessed, y_train)

y_pred_lgbm = final_lgbm.predict(X_test_preprocessed)
actual_test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_lgbm))


print(f"Best LGBM RMSE: {study_lgbm.best_value}")
print(f"Internal Test RMSE: {actual_test_rmse}")
print(f"Best LGBM params: {study_lgbm.best_params}")

In [None]:
# optuna code for CatBoostRegressor


def objective_cat(trial):
    param = {
        'loss_function': 'RMSE',
        'random_seed': 45, # CatBoost يستخدم اسم random_seed
        'verbose': False,
        'iterations': trial.suggest_int('iterations', 2000, 4000),
        'depth': trial.suggest_int('depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'random_strength': trial.suggest_float('random_strength', 0, 10),
        'bagging_temperature': trial.suggest_float('bagging_temperature', 0, 1),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
    }

    model = CatBoostRegressor(**param)
    kf = KFold(n_splits=5, shuffle=True, random_state=45)
    
    score = cross_val_score(model, X_train_preprocessed, y_train, 
                            cv=kf, scoring='neg_root_mean_squared_error', n_jobs=-1)
    
    return -score.mean()

sampler_cat = optuna.samplers.TPESampler(seed=45)
study_cat = optuna.create_study(direction='minimize', sampler=sampler_cat)
study_cat.optimize(objective_cat, n_trials=50)

final_cat = CatBoostRegressor(**study_cat.best_params, random_seed=400, verbose=False)
final_cat.fit(X_train_preprocessed, y_train, verbose=False)

y_pred_cat = final_cat.predict(X_test_preprocessed)
actual_test_rmse = np.sqrt(mean_squared_error(y_test, y_pred_cat))

print(f"Best CatBoost RMSE: {study_cat.best_value}")
print(f"Internal Test RMSE: {actual_test_rmse}")
print(f"Best CatBoost params: {study_cat.best_params}")