# XGBoost

In [8]:
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, r2_score

In [4]:
# 1. Load data
data = fetch_california_housing()
X, y = data.data, data.target

# 2. Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# 3. Create DMatrix (optional but recommended for XGBoost API)
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [7]:
# 4. Set parameters for regression
params = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'seed': 42,
    'max_depth': 6,
    'eta': 0.1
}

# 5. Train the model
model = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtest, 'test')], early_stopping_rounds=10)

# 6. Predict on test set
y_pred = model.predict(dtest)

[0]	test-rmse:1.07228
[1]	test-rmse:1.00727
[2]	test-rmse:0.95105
[3]	test-rmse:0.90250
[4]	test-rmse:0.86010
[5]	test-rmse:0.82211
[6]	test-rmse:0.78764
[7]	test-rmse:0.75983
[8]	test-rmse:0.73443
[9]	test-rmse:0.71270
[10]	test-rmse:0.69252
[11]	test-rmse:0.67598
[12]	test-rmse:0.66072
[13]	test-rmse:0.64712
[14]	test-rmse:0.63344
[15]	test-rmse:0.62444
[16]	test-rmse:0.61445
[17]	test-rmse:0.60576
[18]	test-rmse:0.59883
[19]	test-rmse:0.59342
[20]	test-rmse:0.58760
[21]	test-rmse:0.58172
[22]	test-rmse:0.57635
[23]	test-rmse:0.57233
[24]	test-rmse:0.56739
[25]	test-rmse:0.56162
[26]	test-rmse:0.55871
[27]	test-rmse:0.55625
[28]	test-rmse:0.55006
[29]	test-rmse:0.54704
[30]	test-rmse:0.54267
[31]	test-rmse:0.53846
[32]	test-rmse:0.53508
[33]	test-rmse:0.53043
[34]	test-rmse:0.52832
[35]	test-rmse:0.52664
[36]	test-rmse:0.52229
[37]	test-rmse:0.52088
[38]	test-rmse:0.51801
[39]	test-rmse:0.51693
[40]	test-rmse:0.51444
[41]	test-rmse:0.51180
[42]	test-rmse:0.51084
[43]	test-rmse:0.5100

In [9]:
# 7. Evaluate
rmse = root_mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {rmse:.3f}")
print(f"R^2: {r2:.3f}")


RMSE: 0.477
R^2: 0.827
