In [1]:
from sklearn.datasets import load_iris
import xgboost as xgb

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
# Create DMatrix for XGBoost
dtrain = xgb.DMatrix(X, label=y)

# Set parameters for XGBoost
params = {
    'objective': 'multi:softmax',  # multiclass classification
    'num_class': 3,  # iris has 3 classes
    'max_depth': None,
    'learning_rate': 0.1,
    'eval_metric': 'mlogloss'
}

# Train XGBoost model
num_rounds = 100
model = xgb.train(params, dtrain, num_rounds)


In [4]:
# Get model dumps and print number of trees
dumps = model.get_dump(dump_format='json')
print(f"Number of trees generated by XGBoost: {len(dumps)}")
print(dumps[50])

Number of trees generated by XGBoost: 300
  { "nodeid": 0, "depth": 0, "split": "f2", "split_condition": 4.80000019, "yes": 1, "no": 2, "missing": 2 , "children": [
    { "nodeid": 1, "depth": 1, "split": "f3", "split_condition": 1.60000002, "yes": 3, "no": 4, "missing": 4 , "children": [
      { "nodeid": 3, "leaf": -0.0510121472 }, 
      { "nodeid": 4, "leaf": 0.00339141116 }
    ]}, 
    { "nodeid": 2, "depth": 1, "split": "f3", "split_condition": 1.79999995, "yes": 5, "no": 6, "missing": 6 , "children": [
      { "nodeid": 5, "depth": 2, "split": "f2", "split_condition": 5.0999999, "yes": 7, "no": 8, "missing": 8 , "children": [
        { "nodeid": 7, "leaf": -0.0106632998 }, 
        { "nodeid": 8, "leaf": 0.0236955229 }
      ]}, 
      { "nodeid": 6, "depth": 2, "split": "f0", "split_condition": 6, "yes": 9, "no": 10, "missing": 10 , "children": [
        { "nodeid": 9, "depth": 3, "split": "f2", "split_condition": 5.0999999, "yes": 11, "no": 12, "missing": 12 , "children": [
 

In [5]:
from Visualizer import XGBTreeVisualizer

viz = XGBTreeVisualizer(model, X, y, feature_names=iris.feature_names, target_names=iris.target_names)

In [6]:
print(model.predict(dtrain))

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2.]


In [7]:
viz.show_tree()

In [8]:
viz.show_simplified_tree()

In [9]:
# Calculate accuracy of the XGBoost model
from sklearn.metrics import accuracy_score

# Get predictions from the model
y_pred = model.predict(dtrain)

print(y)
print(y_pred)

# Calculate and display accuracy
accuracy = accuracy_score(y, y_pred)
print(f"XGBoost model accuracy: {accuracy:.4f}")


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.
 2. 2. 2. 2. 2. 2.]
XGBoost model accuracy: 1.0000


In [10]:
# Import necessary libraries
import numpy as np
import xgboost as xgb
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from Visualizer import XGBTreeVisualizer

# Load a regression dataset (California Housing)
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DMatrix for XGBoost
dtrain_reg = xgb.DMatrix(X_train, label=y_train)
dtest_reg = xgb.DMatrix(X_test, label=y_test)

# Set parameters for regression
params_reg = {
    'objective': 'reg:squarederror',
    'max_depth': 4,
    'learning_rate': 0.1,
    'eval_metric': 'rmse'
}

# Train the regression model
num_rounds = 50
reg_model = xgb.train(params_reg, dtrain_reg, num_rounds)

# Evaluate the model
y_pred = reg_model.predict(dtest_reg)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Regression model RMSE: {rmse:.4f}")

# Create a visualizer for the regression model
reg_viz = XGBTreeVisualizer(reg_model, X_train, y_train, feature_names=housing.feature_names)



# Get the number of trees in the model
reg_dumps = reg_model.get_dump(dump_format='json')
print(f"Number of trees in regression model: {len(reg_dumps)}")


Regression model RMSE: 0.5486
Number of trees in regression model: 50


In [11]:
print(reg_dumps[20])

  { "nodeid": 0, "depth": 0, "split": "f0", "split_condition": 5.72139978, "yes": 1, "no": 2, "missing": 2 , "children": [
    { "nodeid": 1, "depth": 1, "split": "f5", "split_condition": 3.13289046, "yes": 3, "no": 4, "missing": 4 , "children": [
      { "nodeid": 3, "depth": 2, "split": "f6", "split_condition": 34.5099983, "yes": 7, "no": 8, "missing": 8 , "children": [
        { "nodeid": 7, "depth": 3, "split": "f7", "split_condition": -118.220001, "yes": 15, "no": 16, "missing": 16 , "children": [
          { "nodeid": 15, "leaf": 0.0429796316 }, 
          { "nodeid": 16, "leaf": -0.0033372608 }
        ]}, 
        { "nodeid": 8, "depth": 3, "split": "f7", "split_condition": -119.959999, "yes": 17, "no": 18, "missing": 18 , "children": [
          { "nodeid": 17, "leaf": -0.00024415931 }, 
          { "nodeid": 18, "leaf": -0.0601443611 }
        ]}
      ]}, 
      { "nodeid": 4, "depth": 2, "split": "f7", "split_condition": -121.489998, "yes": 9, "no": 10, "missing": 10 , "chi

In [12]:
# Show the simplified tree
print("Displaying a simplified tree for the regression model:")
reg_viz.show_simplified_tree(max_depth=3)

Displaying a simplified tree for the regression model:


In [13]:
reg_viz.show_tree()