In [1]:
import torch
torch.set_default_dtype(torch.double)
from activephasemap.models.mlp import MLP
import xgboost as xgb
from sklearn.metrics import root_mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np 

In [5]:
# load data
train_x = torch.load("./boosting_data/train_x_4.pt", weights_only=True)
train_z_mean = torch.load("./boosting_data/train_z_mean_4.pt", weights_only=True)
train_z_std = torch.load("./boosting_data/train_z_std_4.pt", weights_only=True)

In [6]:
mlp_model_args = {"num_epochs" : 1000, 
                 "learning_rate" : 1e-3, 
                 "verbose": 100,
                 }
comp_model = MLP(train_x, train_z_mean, train_z_std, **mlp_model_args)
comp_train_loss, comp_eval_loss = comp_model.fit(use_early_stoping=True)

Epoch   1/1000 - Loss: 2.274  Evaluation Loss: 2.020  Early stopping counter: 0
Epoch 101/1000 - Loss: 1.264  Evaluation Loss: 1.606  Early stopping counter: 35
Early stopping...
Epoch 174/1000 - Loss: 1.179  Evaluation Loss: 1.631 


In [7]:
# X_np = train_x.numpy()
# y_np = torch.cat((train_z_mean, train_z_std), dim=1).numpy()

X_xgb = np.random.randn(50, 2)
y_xgb = np.sum(X_xgb, axis=1) + np.random.randn(50) * 0.1

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_xgb, y_xgb, test_size=0.2, random_state=42)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters for regression
params = {
    'objective': 'reg:squarederror',  # Regression objective
    'max_depth': 5,
    'eta': 0.1,  # Learning rate
    'eval_metric': 'rmse',
    'tree_method': 'hist'
}

# Train the model
evals = [(dtrain, 'train'), (dtest, 'test')]

# Train the model with verbose output
model = xgb.train(
    params,
    dtrain,
    evals=evals,
    verbose_eval=True
)

# Predict on the test set
y_pred = model.predict(dtest)

# Calculate the Root Mean Squared Error (RMSE)
rmse = root_mean_squared_error(y_test, y_pred, squared=False)
print(f"Test RMSE: {rmse:.4f}")

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV

X, y = fetch_california_housing(return_X_y=True)
xgb_model = xgb.XGBRegressor(tree_method="hist")
clf = GridSearchCV(
    xgb_model,
    {"max_depth": [2, 4, 6], "n_estimators": [50, 100, 200]},
    verbose=1,
)
clf.fit(X, y)
print(clf.best_score_)
print(clf.best_params_)