## Log a Xgboost and Catboost Models

Log a Xgboost and Catboost Models with Katonic SDK's Log package.

In [1]:
!pip install xgboost catboost -q

You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.[0m


### Imports

In [2]:
import os

import pandas as pd
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss, recall_score, f1_score, precision_score
from katonic.log.logmodel import LogModel

### Experiment Name

In [3]:
experiment_name= "xgb_catb_model"

### Initiate LogModel with experiment name

In [4]:
lm = LogModel(experiment_name)

2022/06/10 11:10:39 INFO mlflow.tracking.fluent: Experiment with name 'xgb_catb_model' does not exist. Creating a new experiment.


### Metadata of the created / existing experiment

In [5]:
# experiment id
exp_id = lm.id

print("experiment name: ", lm.name)
print("experiment location: ", lm.location)
print("experiment id: ", lm.id)
print("experiment status: ", lm.stage)

experiment name:  xgb_catb_model
experiment location:  s3://models/23
experiment id:  23
experiment status:  active


### Artifact path where you want to log your model

In [6]:
artifact_path = "xgb-catb-model"

### Read the data

In [7]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Get features and label

In [8]:
x = df.drop(columns=['Outcome'], axis=1)
y = df['Outcome']

## Split the dataset in Train and Test

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=98)

In [10]:
def metric(actual, pred):
    acc_score = accuracy_score(actual, pred)
    recall = recall_score(actual, pred)
    precision_scr = precision_score(actual, pred)
    f1_scr = f1_score(actual, pred)
    auc_roc = roc_auc_score(actual, pred)
    log_los = log_loss(actual, pred)

    return (
        acc_score,
        auc_roc,
        log_los,
        recall,
        f1_scr,
        precision_scr
    )

### Xgboost 

In [11]:
model_clf = XGBClassifier(random_state=0)
model_clf.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [12]:
y_pred = model_clf.predict(X_test)
(acc_score, auc_roc, log_los, recall, f1_scr, precision_scr) = metric(y_test, y_pred)

model_mertics = {
    "accuracy_score": acc_score,
    "roc_auc_score": auc_roc,
    "log_loss": log_los,
    "recall": recall,
    "f1_score": f1_scr,
    "precision_score": precision_scr
}

### Log xgboost model

In [13]:
lm.model_logging(
    model_name="xgboost",
    model_type="xgboost",
    model=model_clf,
    artifact_path=artifact_path,
    current_working_dir=f'{os.getcwd()}/xgboost_catboost_logging.ipynb',
    metrics=model_mertics
)

Model artifact logged to: s3://models/23/8085d9367a4443158ce554c60475a703/artifacts/xgb_catb_model_23_xgb-catb-model_xgboost


### Catboost

In [14]:
model_clf = CatBoostClassifier(random_state=0)
model_clf.fit(X_train, y_train)

Learning rate set to 0.008365
0:	learn: 0.6886390	total: 54ms	remaining: 53.9s
1:	learn: 0.6848469	total: 55.5ms	remaining: 27.7s
2:	learn: 0.6808885	total: 56.8ms	remaining: 18.9s
3:	learn: 0.6768608	total: 58.4ms	remaining: 14.5s
4:	learn: 0.6740004	total: 60.4ms	remaining: 12s
5:	learn: 0.6703328	total: 62ms	remaining: 10.3s
6:	learn: 0.6663177	total: 63.3ms	remaining: 8.97s
7:	learn: 0.6625272	total: 64.7ms	remaining: 8.02s
8:	learn: 0.6588317	total: 66.1ms	remaining: 7.28s
9:	learn: 0.6545630	total: 67.4ms	remaining: 6.67s
10:	learn: 0.6509276	total: 68.9ms	remaining: 6.19s
11:	learn: 0.6480414	total: 70.2ms	remaining: 5.78s
12:	learn: 0.6440515	total: 71.4ms	remaining: 5.42s
13:	learn: 0.6407300	total: 73ms	remaining: 5.14s
14:	learn: 0.6375678	total: 74.4ms	remaining: 4.88s
15:	learn: 0.6340485	total: 75.7ms	remaining: 4.66s
16:	learn: 0.6311749	total: 77.1ms	remaining: 4.46s
17:	learn: 0.6283656	total: 78.3ms	remaining: 4.27s
18:	learn: 0.6253587	total: 79.7ms	remaining: 4.11s


<catboost.core.CatBoostClassifier at 0x7f90da3a2af0>

In [15]:
y_pred = model_clf.predict(X_test)
(acc_score, auc_roc, log_los, recall, f1_scr, precision_scr) = metric(y_test, y_pred)

model_mertics = {
    "accuracy_score": acc_score,
    "roc_auc_score": auc_roc,
    "log_loss": log_los,
    "recall": recall,
    "f1_score": f1_scr,
    "precision_score": precision_scr
}

### Log Catboost model

In [17]:
lm.model_logging(
    model_name="catboost",
    model_type="catboost",
    model=model_clf,
    artifact_path=artifact_path,
    current_working_dir=f'{os.getcwd()}/xgboost_catboost_logging.ipynb',
    metrics=model_mertics
)

Model artifact logged to: s3://models/23/ef9fe064bc274a3f8dc0cd89fda21fb8/artifacts/xgb_catb_model_23_xgb-catb-model_catboost


### Check Loaded Models

In [18]:
df_runs = lm.search_runs(exp_id)
print("Number of runs done : ", len(df_runs))

Number of runs done :  2


In [19]:
df_runs.head()

Unnamed: 0,artifact_uri,end_time,experiment_id,metrics.accuracy_score,metrics.f1_score,metrics.log_loss,metrics.precision_score,metrics.recall,metrics.roc_auc_score,run_id,run_name,start_time,status,tags.mlflow.log-model.history
0,s3://models/23/ef9fe064bc274a3f8dc0cd89fda21fb...,2022-06-10 11:11:05.194000+00:00,23,0.785714,0.62069,7.401218,0.72973,0.54,0.721923,ef9fe064bc274a3f8dc0cd89fda21fb8,catboost-model,2022-06-10 11:11:02.497000+00:00,FINISHED,"[{""run_id"": ""ef9fe064bc274a3f8dc0cd89fda21fb8""..."
1,s3://models/23/8085d9367a4443158ce554c60475a70...,2022-06-10 11:10:57.425000+00:00,23,0.766234,0.625,8.074083,0.652174,0.6,0.723077,8085d9367a4443158ce554c60475a703,scikit-learn-model,2022-06-10 11:10:53.169000+00:00,FINISHED,"[{""run_id"": ""8085d9367a4443158ce554c60475a703""..."
