In [1]:
from sklearn.datasets import load_iris
import xgboost as xgb

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
dtrain = xgb.DMatrix(X, label=y)

params = {
    'objective': 'multi:softmax',  # multiclass classification
    'num_class': 3,  # iris has 3 classes
    'max_depth': None,
    'learning_rate': 0.1,
    'eval_metric': 'mlogloss'
}

num_rounds = 100
model = xgb.train(params, dtrain, num_rounds)


In [None]:
dumps = model.get_dump(dump_format='json')
print(f"Number of trees generated by XGBoost: {len(dumps)}")
print(dumps[50])

In [4]:
from Visualizer import XGBTreeVisualizer

viz = XGBTreeVisualizer(model, X, y, feature_names=iris.feature_names, target_names=iris.target_names)

In [5]:
print(model.predict(dtrain))

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2.]


In [6]:
viz.show_tree()


In [7]:
viz.show_simplified_tree(max_depth=10)

In [8]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(dtrain)

print(y)
print(y_pred)

accuracy = accuracy_score(y, y_pred)
print(f"XGBoost model accuracy: {accuracy:.4f}")


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2.]
XGBoost model accuracy: 1.0000


In [None]:
import numpy as np
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from Visualizer import XGBTreeVisualizer

housing = fetch_california_housing()
X, y = housing.data, housing.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

dtrain_reg = xgb.DMatrix(X_train, label=y_train)
dtest_reg = xgb.DMatrix(X_test, label=y_test)

params_reg = {
    'objective': 'reg:squarederror',
    'max_depth': 4,
    'learning_rate': 0.1,
    'eval_metric': 'rmse'
}

num_rounds = 50
reg_model = xgb.train(params_reg, dtrain_reg, num_rounds)

y_pred = reg_model.predict(dtest_reg)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Regression model RMSE: {rmse:.4f}")

reg_viz = XGBTreeVisualizer(reg_model, X_train, y_train, feature_names=housing.feature_names)



reg_dumps = reg_model.get_dump(dump_format='json')
print(f"Number of trees in regression model: {len(reg_dumps)}")


In [None]:
print(reg_dumps[20])

In [None]:
print("Displaying a simplified tree for the regression model:")
simple_model = reg_viz.show_simplified_tree(max_depth=3)

In [None]:
reg_viz.show_tree()

In [None]:
# Create predictions from both models
xgb_predictions = reg_model.predict(dtest_reg)
simple_predictions = simple_model.predict(X_test)
true_values = y_test

# Create a DataFrame for easier plotting
import pandas as pd
import matplotlib.pyplot as plt

results_df = pd.DataFrame({
    'True Values': true_values,
    'XGBoost Predictions': xgb_predictions,
    'Simplified Tree Predictions': simple_predictions
})

# Sort by true values for better visualization
results_df = results_df.sort_values('True Values')

# Plot the predictions
plt.figure(figsize=(12, 6))
plt.plot(results_df.index, results_df['True Values'], 'b-', label='True Values')
plt.plot(results_df.index, results_df['XGBoost Predictions'], 'r--', label='XGBoost Model')
plt.plot(results_df.index, results_df['Simplified Tree Predictions'], 'g-.', label='Simplified Tree Model')

plt.title('Comparison of Model Predictions')
plt.xlabel('Samples (sorted by true values)')
plt.ylabel('Target Value')
plt.legend()
plt.grid(True, alpha=0.3)

# Add a scatter plot to compare predictions directly
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
plt.scatter(true_values, xgb_predictions, alpha=0.5, label='XGBoost')
plt.scatter(true_values, simple_predictions, alpha=0.5, label='Simplified Tree')
plt.plot([min(true_values), max(true_values)], [min(true_values), max(true_values)], 'k--')
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('Predictions vs True Values')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.scatter(xgb_predictions, simple_predictions, alpha=0.5)
plt.plot([min(xgb_predictions), max(xgb_predictions)], [min(xgb_predictions), max(xgb_predictions)], 'k--')
plt.xlabel('XGBoost Predictions')
plt.ylabel('Simplified Tree Predictions')
plt.title('XGBoost vs Simplified Tree')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Calculate and print metrics
from sklearn.metrics import mean_squared_error, r2_score

xgb_rmse = np.sqrt(mean_squared_error(true_values, xgb_predictions))
simple_rmse = np.sqrt(mean_squared_error(true_values, simple_predictions))
xgb_r2 = r2_score(true_values, xgb_predictions)
simple_r2 = r2_score(true_values, simple_predictions)

print(f"XGBoost RMSE: {xgb_rmse:.4f}, R²: {xgb_r2:.4f}")
print(f"Simplified Tree RMSE: {simple_rmse:.4f}, R²: {simple_r2:.4f}")
