In [1]:
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
import pandas as pd

import model_imports as mi
import os
import numpy as np

In [None]:
#Build a model based off the data set that kept samples with at least 2 bands present.
xgb = XGBRegressor(n_estimators=500,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="reg:squarederror",
    random_state=42,
)

param_dist = {
    "n_estimators": [300, 600, 900],
    "max_depth": [3, 5, 7],
    "learning_rate": [0.01, 0.05, 0.1],
    "subsample": [0.7, 1.0],
    "colsample_bytree": [0.7, 1.0],
    "min_child_weight": [1, 3, 5],
}

In [3]:
from sklearn.model_selection import RandomizedSearchCV

xgb_search = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=param_dist,
    n_iter=15,                  # number of random combos to try
    cv=5,                       # 5-fold CV
    scoring="neg_root_mean_squared_error",
    random_state=42,
    n_jobs=-1,
    verbose=1,
)

In [4]:
xgb_search.fit(mi.X_train, mi.y_train)
print("Best params (2-band):", xgb_search.best_params_)
print("Best CV RMSE (2-band):", -xgb_search.best_score_)
    
best_xgb_2band = xgb_search.best_estimator_

Fitting 5 folds for each of 15 candidates, totalling 75 fits


Traceback (most recent call last):
  File [35m"/opt/miniconda3/envs/simple-db/lib/python3.13/site-packages/joblib/externals/loky/backend/resource_tracker.py"[0m, line [35m297[0m, in [35mmain[0m
    raise ValueError(
    ...<4 lines>...
    )
[1;35mValueError[0m: [35mCannot register "REGISTER","rtype":"folder","base64_name" for automatic cleanup: unknown resource type ("L3Zhci9mb2xkZXJzL3RxLzNwNzR3enc1MjB2M3JubGczeWxuMGc2bTAwMDBncC9UL2pvYmxpYl9tZW1tYXBwaW5nX2ZvbGRlcl8zNTIzOF9iNTg2MDQyNmVhNGY0ODE3ODAyMmVkOTIyMjRlMjUzM180ZTkxMDZmYjFmZWQ0MjgzOWY3NGM1YTE1OGQ5YjVhMw=="}). Resource type should be one of the following: ['noop', 'folder', 'file', 'semlock'][0m
Traceback (most recent call last):
  File [35m"/opt/miniconda3/envs/simple-db/lib/python3.13/site-packages/joblib/externals/loky/backend/resource_tracker.py"[0m, line [35m297[0m, in [35mmain[0m
    raise ValueError(
    ...<4 lines>...
    )
[1;35mValueError[0m: [35mCannot register "REGISTER","rtype":"semlock","base64_n

Best params (2-band): {'subsample': 0.7, 'n_estimators': 900, 'min_child_weight': 5, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
Best CV RMSE (2-band): 1.9540166171127868


In [5]:
y_pred_2band = best_xgb_2band.predict(mi.X_test)
mi.evaluate_model(model_name="Tuned XGB Regressor", model = best_xgb_2band, y_pred=y_pred_2band, two_bands = True, colors_only = True)

=== Tuned XGB Regressor results for samples with at least 2 bands trained on colors only ===
Mean Squared Error: 3.130
Root Mean Squared Error: 1.769
R^2 on test: 0.926
Fraction within ±1 subtype: 0.698
Fraction within ±2 subtypes: 0.858


In [6]:
#Build a model based off the data set that kept samples with at least 3 bands
xgb_3band = XGBRegressor(n_estimators=500,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    objective="reg:squarederror",
    random_state=42,
)

In [7]:
xgb_search_3band = RandomizedSearchCV(
    estimator=xgb_3band,
    param_distributions=param_dist,
    n_iter=15,                  # number of random combos to try
    cv=5,                       # 5-fold CV
    scoring="neg_root_mean_squared_error",
    random_state=42,
    n_jobs=-1,
    verbose=1,
)

In [8]:
xgb_search_3band.fit(mi.X_3bands_train, mi.y_3bands_train)
print("Best params (3-band):", xgb_search_3band.best_params_)
print("Best CV RMSE (3-band):", -xgb_search_3band.best_score_)
    
best_xgb_3band = xgb_search_3band.best_estimator_

Fitting 5 folds for each of 15 candidates, totalling 75 fits


Traceback (most recent call last):
  File [35m"/opt/miniconda3/envs/simple-db/lib/python3.13/site-packages/joblib/externals/loky/backend/resource_tracker.py"[0m, line [35m297[0m, in [35mmain[0m
    raise ValueError(
    ...<4 lines>...
    )
[1;35mValueError[0m: [35mCannot register "REGISTER","rtype":"folder","base64_name" for automatic cleanup: unknown resource type ("L3Zhci9mb2xkZXJzL3RxLzNwNzR3enc1MjB2M3JubGczeWxuMGc2bTAwMDBncC9UL2pvYmxpYl9tZW1tYXBwaW5nX2ZvbGRlcl8zNTIzOF8wOWM5ZTkzOWUyMzE0N2QzYjk0YzlhOTNjMzQ3YjdmOF84NTUwMzFjYzBhZDM0MzYzOTcwZWNhYjc3MjY3NGVjNA=="}). Resource type should be one of the following: ['noop', 'folder', 'file', 'semlock'][0m
Traceback (most recent call last):
  File [35m"/opt/miniconda3/envs/simple-db/lib/python3.13/site-packages/joblib/externals/loky/backend/resource_tracker.py"[0m, line [35m297[0m, in [35mmain[0m
    raise ValueError(
    ...<4 lines>...
    )
[1;35mValueError[0m: [35mCannot register "REGISTER","rtype":"folder","base64_na

Best params (3-band): {'subsample': 0.7, 'n_estimators': 900, 'min_child_weight': 5, 'max_depth': 5, 'learning_rate': 0.01, 'colsample_bytree': 1.0}
Best CV RMSE (3-band): 1.4781935954301573


In [9]:
y_pred_3band = best_xgb_3band.predict(mi.X_3bands_test)
mi.evaluate_model(model_name="Tuned XGB Regressor", model = best_xgb_3band, y_pred=y_pred_3band, two_bands = False, colors_only = True)

=== Tuned XGB Regressor results for samples with at least 3 bands trained on colors only ===
Mean Squared Error: 2.544
Root Mean Squared Error: 1.595
R^2 on test: 0.884
Fraction within ±1 subtype: 0.769
Fraction within ±2 subtypes: 0.918


Traceback (most recent call last):
  File [35m"/opt/miniconda3/envs/simple-db/lib/python3.13/site-packages/joblib/externals/loky/backend/resource_tracker.py"[0m, line [35m297[0m, in [35mmain[0m
    raise ValueError(
    ...<4 lines>...
    )
[1;35mValueError[0m: [35mCannot register "UNREGISTER","rtype":"semlock","base64_name" for automatic cleanup: unknown resource type ("L2xva3ktMzUyMzgtOXhnbXd1MF8="}). Resource type should be one of the following: ['noop', 'folder', 'file', 'semlock'][0m
Traceback (most recent call last):
  File [35m"/opt/miniconda3/envs/simple-db/lib/python3.13/site-packages/joblib/externals/loky/backend/resource_tracker.py"[0m, line [35m297[0m, in [35mmain[0m
    raise ValueError(
    ...<4 lines>...
    )
[1;35mValueError[0m: [35mCannot register "UNREGISTER","rtype":"semlock","base64_name" for automatic cleanup: unknown resource type ("L2xva3ktMzUyMzgtMDZyMmN6M2E="}). Resource type should be one of the following: ['noop', 'folder', 'file', 'seml

In [10]:
np.save("y_pred_xgb_tuned.npy", y_pred_2band)
np.save("xgb_model_tuned.npy", best_xgb_2band)