In [1]:
import xgboost as xgb

In [2]:
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

In [8]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [10]:
param = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.3,  # the training step for each iteration
    'silent': 1,  # logging mode - quiet
    'objective': 'multi:softprob',  # error evaluation for multiclass training
    'num_class': 3}  # the number of classes that exist in this datset
num_round = 20  # the number of training iterations

In [11]:
bst = xgb.train(param, dtrain, num_round)

In [12]:
preds = bst.predict(dtest)

In [13]:
import numpy as np
best_preds = np.asarray([np.argmax(line) for line in preds])

In [17]:
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

precision = precision_score(y_test, best_preds, average='macro')
accuracy = accuracy_score(y_test, best_preds)
recall = recall_score(y_test, best_preds, average='macro')
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print("Precision: %.2f%%" % (precision * 100.0))
print("Recall: %.2f%%" % (recall * 100.0))

Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%


In [20]:
import pandas as pd
df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
df['target'] = iris['target']

In [22]:
df["target"].value_counts()

2    50
1    50
0    50
Name: target, dtype: int64

In [23]:
bst.dump_model('dump.raw.txt')

In [24]:
from sklearn.externals import joblib

joblib.dump(bst, 'bst_model.pkl', compress=True)
# bst = joblib.load('bst_model.pkl') # load it later

['bst_model.pkl']