## Log a Catboost Model

Train and Log a Custom built Catboost Model with Katonic-SDK Log package.

In [None]:
!pip install catboost -q

### Imports

In [1]:
import os

import pandas as pd
from catboost import CatBoostClassifier

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss, recall_score, f1_score, precision_score
from katonic.log.logmodel import LogModel

### Experiment Name

In [2]:
experiment_name= "custom_catboost_model"

### Initiate LogModel with experiment name

In [None]:
lm = LogModel(experiment_name, source_name='catboost_model_logging.ipynb')

### Check Metadata of the created / existing experiment

In [None]:
# experiment id
exp_id = lm.id

print("experiment name: ", lm.name)
print("experiment location: ", lm.location)
print("experiment id: ", lm.id)
print("experiment status: ", lm.stage)

### Artifact path where you want to log your model

In [6]:
artifact_path = "catboost-model"

### Load Training Data

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv')
df.head()

### Get features and labels

In [8]:
x = df.drop(columns=['Outcome'], axis=1)
y = df['Outcome']

## Split the dataset in Train and Test

In [9]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=98)

### Define Evaluation Metrics

In [10]:
def metric(actual, pred):
    acc_score = accuracy_score(actual, pred)
    recall = recall_score(actual, pred)
    precision_scr = precision_score(actual, pred)
    f1_scr = f1_score(actual, pred)
    auc_roc = roc_auc_score(actual, pred)
    log_los = log_loss(actual, pred)

    return (
        acc_score,
        auc_roc,
        log_los,
        recall,
        f1_scr,
        precision_scr
    )

### Train Xgboost Model

In [None]:
model_clf = CatBoostClassifier(random_state=0)
model_clf.fit(X_train, y_train)

### Calculate metrics for the Catboost model

In [12]:
y_pred = model_clf.predict(X_test)
(acc_score, auc_roc, log_los, recall, f1_scr, precision_scr) = metric(y_test, y_pred)

model_mertics = {
    "accuracy_score": acc_score,
    "roc_auc_score": auc_roc,
    "log_loss": log_los,
    "recall": recall,
    "f1_score": f1_scr,
    "precision_score": precision_scr
}

### Log Catboost Model

In [None]:
lm.model_logging(
    model_name="catboost",
    model_type="catboost",
    model=model_clf,
    artifact_path=artifact_path,
    current_working_dir=f'{os.getcwd()}/catboost_model_logging.ipynb',
    metrics=model_mertics
)

### Check all the logged Models

In [None]:
df_runs = lm.search_runs(exp_id)
print("Number of runs done : ", len(df_runs))

In [None]:
df_runs.head()