# [CatBoost](https://catboost.ai/) 

CatBoost is a machine learning algorithm developed by Yandex, a Russian technology company. It's designed for gradient boosting on decision trees and is particularly powerful for tasks involving categorical features.



In [33]:
! pip install catboost -q

In [34]:
# importing the libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import catboost as cb
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, accuracy_score, confusion_matrix,classification_report, precision_score, f1_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [35]:
# import the dataset of titanic
df= sns.load_dataset('titanic')
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [36]:
# preprocess the data
# impute missing values using knn imputer in fare, age, embark and embarked_town
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=5)
df[['fare', 'age']] = imputer.fit_transform(df[['fare', 'age']])
# impute missing values of embark and embarked_town using mode
# df['embarked_town'] = df['embarked'].map({'C': 'Cherbourg', 'Q': 'Queenstown', 'S': 'Southampton'})
# df['embarked_town'] = df['embarked_town'].fillna(df['embarked_town'].mode()[0])
# df = df.drop('embarked', axis=1)
# df
# impute missing values using mode in embarked and embarked_town using simple imputer
# df['embarked'] = df['embarked'].fillna(df['embarked'].mode()[0])
# df['embarked_town'] = df['embarked_town'].fillna(df['embarked_town'].mode()[0])
# df

# impute missing values of embark and embarked using pandas
df['embarked'] = df['embarked'].fillna(df['embarked'].mode()[0])
df['embark_town'] = df['embarked'].fillna(df['embark_town'].mode()[0])

# drop the deck column
df.drop('deck', axis=1, inplace=True)

In [37]:
df.isnull().sum().sort_values(ascending=False)

survived       0
pclass         0
sex            0
age            0
sibsp          0
parch          0
fare           0
embarked       0
class          0
who            0
adult_male     0
embark_town    0
alive          0
alone          0
dtype: int64

In [38]:
# convert each catagory column to catagory
categorical_cols=df.select_dtypes(include=['object', 'category']).columns
df[categorical_cols]=df[categorical_cols].astype('category')

In [39]:
# split data into X and y
X = df.drop('survived', axis=1)
y = df['survived']

# split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [41]:
# run the catboost classifier
model= CatBoostClassifier(iterations=100, depth=3, learning_rate=0.1, loss_function='Logloss',eval_metric='Accuracy',verbose=False, random_seed=42)


# train the model
model.fit(X_train, y_train, cat_features=categorical_cols)

# predict the data
y_pred = model.predict(X_test)
# evaluate the model


In [42]:
# evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix")
print(confusion_matrix(y_test, y_pred))
print("Classification Report")  
print(classification_report(y_test, y_pred))

Accuracy: 1.0
Confusion Matrix
[[105   0]
 [  0  74]]
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       105
           1       1.00      1.00      1.00        74

    accuracy                           1.00       179
   macro avg       1.00      1.00      1.00       179
weighted avg       1.00      1.00      1.00       179

