In [None]:
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

## GBR

In [None]:
gbr_param_grid = {
    'n_estimators': [50, 100, 150],  
    'max_depth': [3, 4, 5, 6],     
    'learning_rate': [0.01, 0.1, 0.2],
    #'subsample': [0.5, 0.7, 0.9, 1.0],
    #'min_samples_split': [2, 5, 10],
    #'min_samples_leaf': [1, 2, 4],
    #'max_features': ['auto', 'sqrt', 'log2', 0.5, 0.7, None],
    #'min_impurity_decrease': [0.0, 0.01, 0.05]
}

def create_and_tune_gbr(X_train, y_train, X_val, y_val):
    # Combine training and validation data
    X_combined = np.vstack((X_train, X_val))
    y_combined = np.hstack((y_train, y_val))
    print("hello")
    # Create a predefined split for GridSearchCV
    test_fold = [-1]*len(X_train) + [0]*len(X_val)
    ps = PredefinedSplit(test_fold)
    
    # Initialize GridSearchCV
    gridsearch = GridSearchCV(GradientBoostingRegressor(random_state=1), 
                              gbr_param_grid, 
                              cv=ps, 
                              scoring='neg_mean_squared_error', 
                              n_jobs=-1)

    # Fit the model
    gridsearch.fit(X_combined, y_combined)
    
    
    return gridsearch.best_estimator_

In [None]:
y_pred_A_gbr[y_pred_a_gbr < 0] = 0
y_pred_B_gbr[y_pred_b_gbr < 0] = 0
y_pred_C_gbr[y_pred_c_gbr < 0] = 0

## Catboost

In [None]:
cat_param_grid = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'depth': [4, 6, 8],
    # Add other hyperparameters you want to tune
}

def create_and_tune_catboost(X_train, y_train, X_val, y_val):
    # Combine training and validation data
    X_combined = np.vstack((X_train, X_val))
    y_combined = np.hstack((y_train, y_val))
    
    # Create a predefined split for GridSearchCV
    test_fold = [-1]*len(X_train) + [0]*len(X_val)
    ps = PredefinedSplit(test_fold)
    
    # Initialize GridSearchCV with CatBoostRegressor
    gridsearch = GridSearchCV(CatBoostRegressor(), 
                              cat_param_grid, 
                              cv=ps, 
                              scoring='neg_mean_squared_error', 
                              n_jobs=-1)

    # Fit the model
    gridsearch.fit(X_combined, y_combined)
    
    # Make predictions on the validation set using the best model
    y_pred = gridsearch.best_estimator_.predict(X_val)
    
    return gridsearch.best_estimator_

In [None]:
catboost_model_A = create_and_tune_catboost(X_train_A, y_train_A, X_val_A, y_val_A)
catboost_model_B = create_and_tune_catboost(X_train_B, y_train_B, X_val_B, y_val_B)
catboost_model_C = create_and_tune_catboost(X_train_C, y_train_C, X_val_C, y_val_C)

In [None]:
y_pred_a_catboost = catboost_model_A.predict(X_test_A).ravel()
y_pred_b_catboost = catboost_model_B.predict(X_test_B).ravel()
y_pred_c_catboost = catboost_model_C.predict(X_test_C).ravel()

In [None]:
y_pred_a_catboost[y_pred_a_lgbm < 0] = 0
y_pred_b_catboost[y_pred_b_lgbm < 0] = 0
y_pred_c_catboost[y_pred_c_lgbm < 0] = 0

## Scaling values

In [None]:
def scale_values(values, orig_min, orig_max, scaled_min, scaled_max):
    return [(x - orig_min) / (orig_max - orig_min) * (scaled_max - scaled_min) + scaled_min for x in values]

In [None]:
y_pred_A_mlp = scale_values(y_pred_a_mlp, min(y_pred_a_mlp), max(y_pred_a_mlp),0, 5000)
y_pred_B_mlp = scale_values(y_pred_b_mlp, min(y_pred_b_mlp), max(y_pred_b_mlp), 0, 1000)
y_pred_C_mlp = scale_values(y_pred_c_mlp, min(y_pred_c_mlp), max(y_pred_c_mlp), 0, 1000)

y_pred_A_lgbm = scale_values(y_pred_a_lgbm, min(y_pred_a_lgbm), max(y_pred_a_lgbm),0, 5000)
y_pred_B_lgbm = scale_values(y_pred_b_lgbm, min(y_pred_b_lgbm), max(y_pred_b_lgbm), 0, 1000)
y_pred_C_lgbm = scale_values(y_pred_c_lgbm, min(y_pred_c_lgbm), max(y_pred_c_lgbm), 0, 1000)

y_pred_A_gbr = scale_values(y_pred_a_gbr, min(y_pred_a_gbr), max(y_pred_a_gbr),0, 5000)
y_pred_B_gbr = scale_values(y_pred_b_gbr, min(y_pred_b_gbr), max(y_pred_b_gbr), 0, 1000)
y_pred_C_gbr = scale_values(y_pred_c_gbr, min(y_pred_c_gbr), max(y_pred_c_gbr), 0, 1000)


## LGBM several models

In [3]:
def create_lgbm_models(X_train, y_train, X_val, y_val, lgbm_param_grid_1, lgbm_param_grid_2):
    model_1 = lgb.LGBMRegressor(**lgbm_param_grid_1, verbose=-1)
    model_1.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=100)

    model_2 = lgb.LGBMRegressor(**lgbm_param_grid_2, verbose=-1)
    model_2.fit(X_train, y_train, eval_set=[(X_val, y_val)], verbose=100)

    return model_1, model_2

lgbm_param_grid_1 = {
    'learning_rate': 0.1,
    'n_estimators': 400,
    'random_state': 1,
    'max_depth': 8,
    'subsample': 0.5,
    'boosting_type': 'gbdt'
}

lgbm_param_grid_2 = {
    'learning_rate': 0.01,
    'n_estimators': 150,
    'random_state': 1,
    'max_depth': 3,
    'subsample': 0.8,
    'boosting_type': 'goss'
}

In [None]:
lgbm_model_A2, lgbm_model_A3 = create_lgbm_models(X_train_A, y_train_A, X_val_A, y_val_A, lgbm_param_grid_1, lgbm_param_grid_2)
lgbm_model_B2, lgbm_model_B3 = create_lgbm_models(X_train_B, y_train_B, X_val_B, y_val_B, lgbm_param_grid_1, lgbm_param_grid_2)
lgbm_model_C2, lgbm_model_C3 = create_lgbm_models(X_train_C, y_train_C, X_val_C, y_val_C, lgbm_param_grid_1, lgbm_param_grid_2)

In [None]:
y_pred_a_lgbm2 = lgbm_model_A2.predict(X_test_A).ravel()
y_pred_b_lgbm2 = lgbm_model_B2.predict(X_test_B).ravel()
y_pred_c_lgbm2 = lgbm_model_C2.predict(X_test_C).ravel()

y_pred_a_lgbm3 = lgbm_model_A3.predict(X_test_A).ravel()
y_pred_b_lgbm3 = lgbm_model_B3.predict(X_test_B).ravel()
y_pred_c_lgbm3 = lgbm_model_C3.predict(X_test_C).ravel()

In [None]:
y_pred_a_lgbm[y_pred_a_lgbm2 < 0] = 0
y_pred_b_lgbm[y_pred_b_lgbm2 < 0] = 0
y_pred_c_lgbm[y_pred_c_lgbm2 < 0] = 0

y_pred_a_lgbm[y_pred_a_lgbm3 < 0] = 0
y_pred_b_lgbm[y_pred_b_lgbm3 < 0] = 0
y_pred_c_lgbm[y_pred_c_lgbm3 < 0] = 0

In [None]:
y_pred_A = np.power(np.maximum(y_pred_a_lgbm * y_pred_a_mlp * y_pred_a_lgbm2 * y_pred_a_lgbm3, 0), 1/4)
y_pred_B = np.power(np.maximum(y_pred_b_lgbm * y_pred_b_mlp * y_pred_b_lgbm2 * y_pred_b_lgbm3, 0), 1/4)
y_pred_C = np.power(np.maximum(y_pred_c_lgbm * y_pred_c_mlp * y_pred_c_lgbm2 * y_pred_c_lgbm3, 0), 1/4)

## HIST

In [None]:
# Hyperparameter grid
hist_param_grid = {
    'learning_rate': [0.01, 0.1, 0.5],
    'max_iter': [50, 100],
    'max_leaf_nodes': [31, None],
    'l2_regularization': [0.0, 0.1],
    'max_depth': [None, 10],
    'min_samples_leaf': [1, 20],
    'loss': ['squared_error', 'poisson'],
   'early_stopping': [False, True],
    'n_iter_no_change': [10],  # if early_stopping is True
    'tol': [1e-6]
    'learning_rate': [0.1],
    'max_iter': [50],
    'loss': ['squared_error']
}

def create_and_tune_histgb(X_train, y_train, X_val, y_val):
    X_combined = np.vstack((X_train, X_val))
    y_combined = np.hstack((y_train, y_val))

    test_fold = [-1]*len(X_train) + [0]*len(X_val)
    
    ps = PredefinedSplit(test_fold)

    gridsearch = GridSearchCV(HistGradientBoostingRegressor(), hist_param_grid, cv=ps, scoring='neg_mean_absolute_error', verbose=1)

    gridsearch.fit(X_combined, y_combined)

    print("Best Mean Absolute Error from Grid Search:", -gridsearch.best_score_)

    return gridsearch.best_estimator_


In [None]:
hist_model_A = create_and_tune_histgb(X_train_A, y_train_A, X_val_A, y_val_A)
hist_model_B = create_and_tune_histgb(X_train_B, y_train_B, X_val_B, y_val_B)
hist_model_C = create_and_tune_histgb(X_train_C, y_train_C, X_val_C, y_val_C)

In [None]:
y_pred_a_hist = hist_model_A.predict(X_test_A).ravel()
y_pred_b_hist = hist_model_B.predict(X_test_B).ravel()
y_pred_c_hist = hist_model_C.predict(X_test_C).ravel()

In [None]:
y_pred_a_hist[y_pred_a_hist < 0] = 0
y_pred_b_hist[y_pred_b_hist < 0] = 0
y_pred_c_hist[y_pred_c_hist < 0] = 0

## Hyperparameter tuning mlp

In [None]:
mlp_model_A = train_and_save_mlp_model(X_train_A, y_train_A, X_val_A, y_val_A, 'models/model_location_A.h5')
mlp_model_B = train_and_save_mlp_model(X_train_B, y_train_B, X_val_B, y_val_B, 'models/model_location_B.h5')
mlp_model_C = train_and_save_mlp_model(X_train_C, y_train_C, X_val_C, y_val_C, 'models/model_location_C.h5')

lgbm_model_A = create_and_tune_lgbm(X_train_A, y_train_A, X_val_A, y_val_A)
lgbm_model_B = create_and_tune_lgbm(X_train_B, y_train_B, X_val_B, y_val_B)
lgbm_model_C = create_and_tune_lgbm(X_train_C, y_train_C, X_val_C, y_val_C)