# 0. Configuration

In [None]:
# data config
data_path = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']

In [None]:
# model path to save
model_path = './catboost_model.pkl'

# 1. Modules and functions

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from catboost import CatBoostClassifier
import joblib

# 2. Main

In [None]:
# load a sample dataset
iris_data = pd.read_csv(data_path, 
                        header=None, 
                        names=col_names)

# encode the target variable
iris_data['class'] = iris_data['class'].astype('category').cat.codes

# split data into features (X) and target (y)
X = iris_data.drop(columns=['class'])
y = iris_data['class']

# split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# some preprocessing and feature generation
# TBD - power of X for each feature

In [None]:
# fit the model
model = CatBoostClassifier(
    iterations = 100,
    learning_rate = 0.1,
    depth = 4,
)

model.fit(X_train, y_train)

In [None]:
# check the performance
preds = model.predict(X)


accuracy = accuracy_score(y, preds)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y, preds))


In [None]:
# save the model
joblib.dump(model, model_path)
print(f"Model training complete and saved as {model_path}")