In [1]:
import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score


In [2]:
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the dataset into an XGBoost-specific object, DMatrix, which optimizes memory consumption and training speed
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [3]:
params = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.3,  # the training step for each iteration
    'objective': 'binary:logistic',  # binary classification
    'eval_metric': 'logloss',  # evaluation metric
}

# Specify the number of training iterations
num_round = 100

# Train the model
bst = xgb.train(params, dtrain, num_round)

In [4]:
preds = bst.predict(dtest)
# Convert probabilities to binary output using 0.5 as the threshold
predictions = np.round(preds)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Accuracy: {accuracy * 100.0}%')

Accuracy: 90.0%


In [8]:
dtest.num_col(), dtest.num_row()

(20, 200)

In [12]:
data = np.arange(10).reshape(1, 10).astype(float)

# Convert the numpy array into a DMatrix
dmat = xgb.DMatrix(data)
print(data)

[[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]]


In [11]:
bst.predict(dmat)

array([0.99322647], dtype=float32)