# Tutorial 13: Xgboost Model Building
This tutorial covers the steps to build a xgboost model and log the model

#### Steps
- import the required python libraries i.e mlflow and xgboost etc
- Get the list of experiments which are already present
- Create the experiment if the required experiment is not present
- Read the data and split it into Train and Test sets
- Manually start the mlflow experiment run
- Build the xgboost model using sklearn wrapper API
- Build the model signature
- Manually log the parameters, tags, model and metrics

#### Import the required libraries

In [1]:
# Import the required libraries
import os
import mlflow
from mlflow.tracking import MlflowClient
import numpy as np
import pandas as pd
import xgboost
from xgboost import XGBRegressor

#### Get the list of experiments which already present

In [2]:
from mlflow.tracking import get_tracking_uri
# Get the tracking URI
tracking_uri = mlflow.get_tracking_uri()

# Get the list of existing experiments
client = MlflowClient(tracking_uri=tracking_uri)
experiments = client.list_experiments()

# Extract the experiment names 
experiment_names = []
for exp in experiments:
    experiment_names.append(exp.name)

#### Create the experiment if it is not present.

In [3]:
# Create experiment if it is not present
experiment_name = "xgboost"

if experiment_name not in experiment_names:
    print('Experiment is not present')
    experiment_obj = mlflow.create_experiment(name=experiment_name)
    experiment = mlflow.get_experiment(experiment_obj)
    print('Experiement has been created')
else:
    print('Experiment is present')
    mlflow.set_experiment(experiment_name=experiment_name)
    experiment = mlflow.get_experiment_by_name(experiment_name)
    print('Experiment is set')

Experiment is present
Experiment is set


#### Print the experiment details

In [4]:
print("Experiment Name: {}".format(experiment.name))
Diabetes_Exp_Id = experiment.experiment_id
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))

Experiment Name: xgboost
Experiment_id: 11
Artifact Location: s3://rml-model-artifacts/users/csvishnumurthy/11


#### Download the Diabetes data

In [5]:
# Download Diabetes data
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.tree import DecisionTreeRegressor

db = load_diabetes()
db_df_Ind = pd.DataFrame(db.data,columns=["db1","db2","db3","db4","db5","db6","db7","db8","db9","db10"])
db_df_Target = pd.DataFrame(db.target,columns=["Target"])
print(db_df_Ind.columns.values)
print(db_df_Target.columns.values)

['db1' 'db2' 'db3' 'db4' 'db5' 'db6' 'db7' 'db8' 'db9' 'db10']
['Target']


#### Split the data into Train & Test sets

In [6]:
X_train, X_test, y_train, y_test = train_test_split(db_df_Ind, db_df_Target,
                                                    test_size=0.20,
                                                    random_state=10)

#### Build the model and log it

In [7]:
# start the experiment run
run = mlflow.start_run(experiment_id=Diabetes_Exp_Id,
                 tags= {'Alogirthm':'xgboost'},
                 run_name = 'xgboost with sklearn logmodel')

# Build the model
XGB2 = XGBRegressor(n_estimators = 10,max_depth=1,learning_rate=0.05,
                   objective='reg:squarederror', booster='gbtree', verbosity=3)
XGB2.fit(X_train, y_train)

# Create signature
from mlflow.models.signature import infer_signature
signature = infer_signature(X_train, XGB2.predict(X_train))

# Manually Log parameters & tags
parms = {"n_estimators": 10,
         "max_depth" : 1,
         "learning_rate" : 0.05}
#metrics = {"Metric1": 2500.00, "Metric2": 50.00}
tags = {"Algorithm": "xgboost",
        "Loggin Method": "Manual"}

mlflow.log_params(parms)
mlflow.set_tags(tags)
mlflow.sklearn.log_model(sk_model=XGB2,artifact_path ="model", 
                         signature=signature,
                         input_example=X_train.head(1))
mlflow.sklearn.eval_and_log_metrics(model=XGB2, X=X_test, y_true=y_test,
                                    prefix = 'xgb_',sample_weight=None)

mlflow.end_run()

[05:30:30] DEBUG: ../src/gbm/gbtree.cc:155: Using tree method: 2
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_prune.cc:101: tree pruning end, 2 extra nodes, 0 pruned nodes, max_depth=1
[05:30:30] INFO: ../src/tree/updater_pr