In [None]:
from catboost import CatBoostClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
X,y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((700, 20), (300, 20), (700,), (300,))

In [11]:
classifier = CatBoostClassifier(verbose=0)

In [12]:
classifier.fit(X_train, y_train)

<catboost.core.CatBoostClassifier at 0x7848683921b0>

In [13]:
y_pred = classifier.predict(X_test)

In [14]:
print("Current model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Current model performance:
Accuracy: 0.8633333333333333
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       139
           1       0.88      0.87      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

Confusion Matrix:
[[119  20]
 [ 21 140]]


In [15]:
param_grid = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'depth': [3, 4, 5, 6]
}

In [16]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(estimator=CatBoostClassifier(verbose=0), param_grid=param_grid, cv=5, n_jobs=-1, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [17]:
print("Best Parameters:", grid_search.best_params_)

Best Parameters: {'depth': 6, 'iterations': 100, 'learning_rate': 0.01}


In [18]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

In [19]:
print("Tuned model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tuned)}")
print("Classification Report:")
print(classification_report(y_test, y_pred_tuned))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))

Tuned model performance:
Accuracy: 0.8566666666666667
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.85       139
           1       0.88      0.85      0.86       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

Confusion Matrix:
[[120  19]
 [ 24 137]]


In [20]:
#CatBoost regressor

from catboost import CatBoostRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [21]:
X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=1)

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [23]:
model = CatBoostRegressor(verbose=0)

In [24]:
model.fit(X_train, y_train)

<catboost.core.CatBoostRegressor at 0x784867b67290>

In [25]:
y_pred = model.predict(X_test)

In [26]:
print("Current model performance:")
print(f"R2 score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Current model performance:
R2 score: 0.9819209595586482
Mean Absolute Error: 9.274611839431874
Mean Squared Error: 149.43188273623386


In [27]:
param_grid = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'depth': [3, 4, 5, 6]
}

In [28]:
grid_search = GridSearchCV(estimator=CatBoostRegressor(verbose=0), param_grid=param_grid, cv=5, n_jobs=-1, verbose=3)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 48 candidates, totalling 240 fits


In [29]:
grid_search.best_params_

{'depth': 5, 'iterations': 300, 'learning_rate': 0.05}

In [30]:
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

In [31]:
print("Tuned model performance:")
print(f"R2 score: {r2_score(y_test, y_pred_tuned)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)}")

Tuned model performance:
R2 score: 0.9824776420429117
Mean Absolute Error: 9.11387658762711
Mean Squared Error: 144.8306367807518
