In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [3]:
from catboost import CatBoostClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1)

In [6]:
classifier = CatBoostClassifier()
classifier

<catboost.core.CatBoostClassifier at 0x7d03e26ecb90>

In [7]:
classifier.fit(X_train, y_train)

Learning rate set to 0.008847
0:	learn: 0.6870348	total: 58.2ms	remaining: 58.1s
1:	learn: 0.6807208	total: 63.3ms	remaining: 31.6s
2:	learn: 0.6731438	total: 68.4ms	remaining: 22.7s
3:	learn: 0.6667775	total: 73.3ms	remaining: 18.2s
4:	learn: 0.6596142	total: 78.3ms	remaining: 15.6s
5:	learn: 0.6528330	total: 83.4ms	remaining: 13.8s
6:	learn: 0.6464282	total: 88.3ms	remaining: 12.5s
7:	learn: 0.6409323	total: 93.2ms	remaining: 11.6s
8:	learn: 0.6354650	total: 98.3ms	remaining: 10.8s
9:	learn: 0.6301934	total: 103ms	remaining: 10.2s
10:	learn: 0.6247804	total: 108ms	remaining: 9.71s
11:	learn: 0.6185823	total: 113ms	remaining: 9.32s
12:	learn: 0.6138832	total: 118ms	remaining: 8.97s
13:	learn: 0.6085545	total: 123ms	remaining: 8.66s
14:	learn: 0.6036174	total: 128ms	remaining: 8.41s
15:	learn: 0.5985029	total: 133ms	remaining: 8.19s
16:	learn: 0.5939152	total: 138ms	remaining: 7.99s
17:	learn: 0.5897951	total: 143ms	remaining: 7.81s
18:	learn: 0.5855264	total: 148ms	remaining: 7.65s
19

<catboost.core.CatBoostClassifier at 0x7d03e26ecb90>

In [8]:
y_pred = classifier.predict(X_test)

# Evaluate the model
print("Current model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Current model performance:
Accuracy: 0.8633333333333333
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       139
           1       0.88      0.87      0.87       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

Confusion Matrix:
[[119  20]
 [ 21 140]]


In [11]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
}

In [13]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(estimator=CatBoostClassifier(), param_grid=param_grid, cv=5, verbose=3)
grid_search.fit(X_train, y_train)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
217:	learn: 0.2421726	total: 2.23s	remaining: 838ms
218:	learn: 0.2418131	total: 2.23s	remaining: 825ms
219:	learn: 0.2413065	total: 2.24s	remaining: 813ms
220:	learn: 0.2406110	total: 2.24s	remaining: 801ms
221:	learn: 0.2397653	total: 2.25s	remaining: 789ms
222:	learn: 0.2390415	total: 2.25s	remaining: 777ms
223:	learn: 0.2387396	total: 2.26s	remaining: 766ms
224:	learn: 0.2383830	total: 2.26s	remaining: 754ms
225:	learn: 0.2378613	total: 2.27s	remaining: 742ms
226:	learn: 0.2372973	total: 2.27s	remaining: 731ms
227:	learn: 0.2366269	total: 2.28s	remaining: 719ms
228:	learn: 0.2357717	total: 2.28s	remaining: 708ms
229:	learn: 0.2350753	total: 2.29s	remaining: 696ms
230:	learn: 0.2344935	total: 2.29s	remaining: 685ms
231:	learn: 0.2338051	total: 2.3s	remaining: 674ms
232:	learn: 0.2333775	total: 2.3s	remaining: 662ms
233:	learn: 0.2326029	total: 2.31s	remaining: 651ms
234:	learn: 0.2320339	total: 2.31s	remaining: 640ms
2

In [14]:
# Get best parameters
print("Best Parameters:", grid_search.best_params_)

# Train best model
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model
print("Tuned model performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_tuned)}")
print("Classification Report:")
print(classification_report(y_test, y_pred_tuned))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_tuned))

Best Parameters: {'iterations': 100, 'learning_rate': 0.01}
Tuned model performance:
Accuracy: 0.8566666666666667
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.85       139
           1       0.88      0.85      0.86       161

    accuracy                           0.86       300
   macro avg       0.86      0.86      0.86       300
weighted avg       0.86      0.86      0.86       300

Confusion Matrix:
[[120  19]
 [ 24 137]]


In [15]:
#CatBoost regressor

from catboost import CatBoostRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [16]:
X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=1)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=1)

In [18]:
regressor = CatBoostRegressor(verbose=0)
regressor

<catboost.core.CatBoostRegressor at 0x7d03ca7f9d90>

In [19]:
regressor.fit(X_train, y_train)

<catboost.core.CatBoostRegressor at 0x7d03ca7f9d90>

In [20]:
# Predict
y_pred = regressor.predict(X_test)

# Evaluate the model
print("Current model performance:")
print(f"R2 score: {r2_score(y_test, y_pred)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

Current model performance:
R2 score: 0.9819209595586482
Mean Absolute Error: 9.274611839431874
Mean Squared Error: 149.43188273623386


In [21]:
# Hyperparameter tuning using GridSearchCV
param_grid = {
    'iterations': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2]
    # 'depth': [3, 4, 5, 6]
}

In [22]:
grid_search = GridSearchCV(estimator=CatBoostRegressor(), param_grid=param_grid, cv=5, verbose=2)
grid_search.fit(X_train, y_train)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
16:	learn: 77.9841362	total: 39.2ms	remaining: 653ms
17:	learn: 77.3591748	total: 40.3ms	remaining: 631ms
18:	learn: 76.7428669	total: 41.3ms	remaining: 611ms
19:	learn: 76.1258435	total: 46.9ms	remaining: 657ms
20:	learn: 75.5022130	total: 48.1ms	remaining: 639ms
21:	learn: 74.9249137	total: 49.2ms	remaining: 622ms
22:	learn: 74.3187040	total: 55.1ms	remaining: 663ms
23:	learn: 73.7149450	total: 58.1ms	remaining: 668ms
24:	learn: 73.1803075	total: 60.3ms	remaining: 664ms
25:	learn: 72.6131040	total: 62.1ms	remaining: 654ms
26:	learn: 72.0315337	total: 63.9ms	remaining: 646ms
27:	learn: 71.4529433	total: 64.9ms	remaining: 631ms
28:	learn: 70.8531143	total: 67.9ms	remaining: 634ms
29:	learn: 70.2763613	total: 68.9ms	remaining: 620ms
30:	learn: 69.7085999	total: 70ms	remaining: 607ms
31:	learn: 69.1803234	total: 74.5ms	remaining: 624ms
32:	learn: 68.6265346	total: 75.6ms	remaining: 612ms
33:	learn: 68.0727889	total: 76.7ms	

In [23]:
# Get best parameters
print("Best Parameters:", grid_search.best_params_)

# Train best model
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

# Evaluate the tuned model
print("Tuned model performance:")
print(f"R2 score: {r2_score(y_test, y_pred_tuned)}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)}")

Best Parameters: {'iterations': 200, 'learning_rate': 0.05}
Tuned model performance:
R2 score: 0.9821898835817306
Mean Absolute Error: 9.102523303332436
Mean Squared Error: 147.2090975606283
