In [None]:
!pip install pandas
!pip install matplotlib 
!pip install catboost
!pip install scikit-learn

IMPORTS

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
from catboost import CatBoostClassifier, Pool, cv 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, roc_auc_score

READ DATASETS

In [14]:
df = pd.read_csv('datasets/out_dataset_3_class_porog_60.csv')
y = df.pop('price_group')
del df['price_doc']
cat_features = [i for i, column in enumerate(df.columns) if df[column].dtypes == object]
X = df


SPLIT TRAIN AND TEST DATA

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, train_size = 0.7, random_state=0)

In [16]:
train_pool = Pool(X_train, y_train, cat_features=cat_features)
test_pool = Pool(X_test, y_test, cat_features=cat_features)

INITIALIZING CLASSIFIER

In [17]:
model = CatBoostClassifier(
    iterations=10000,
    learning_rate=0.06,
    depth=5,
    l2_leaf_reg=7,
    loss_function='MultiClass',
    eval_metric='Accuracy',
    verbose=False, 
    early_stopping_rounds=20,
    cat_features=cat_features,
    grow_policy='Lossguide'
)

TRAIN MODEL

In [None]:
model.fit(train_pool, plot=True)

CHECK ACCURACY AND OTHER METRICS

In [None]:
y_pred = model.predict(test_pool)
model.score(test_pool)

In [None]:
f"Accuracy: {accuracy_score(y_test, y_pred)}"

In [None]:
f"F1-Score: {f1_score(y_test, y_pred, average='weighted')}"


In [None]:
f"Confusion Matrix:\n{confusion_matrix(y_test, y_pred)}"

In [None]:
f"ROC-AUC: {roc_auc_score(y_test, model.predict_proba(X_test), multi_class='ovo')}"

SAVE MODEL

In [ ]:
model.save_model("models/catboost-model-1-acc-64-097-2-new_ver")