# Boosted Trees Exercise

Using the following dataset : http://archive.ics.uci.edu/dataset/19/car+evaluation

In [18]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

In [10]:
# Load the dataset
url = "http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
column_names = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
df = pd.read_csv(url, names=column_names)

# Convert categorical features to numerical
df_encoded = pd.get_dummies(df, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])


In [15]:
# Separate features and target variable
X = df_encoded.drop("class", axis=1)
y = df_encoded["class"]

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [19]:
# 1. Apply XGBoost classifier
xgb_model = XGBClassifier(objective='multi:softmax')
xgb_model.fit(X_train, y_train)
xgb_predictions = xgb_model.predict(X_test)

print("XGBoost Classification Report:")
print(classification_report(y_test, xgb_predictions))
print("XGBoost Overall Accuracy:", accuracy_score(y_test, xgb_predictions))

XGBoost Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.92      0.96        83
           1       0.61      1.00      0.76        11
           2       1.00      1.00      1.00       235
           3       1.00      1.00      1.00        17

    accuracy                           0.98       346
   macro avg       0.90      0.98      0.93       346
weighted avg       0.99      0.98      0.98       346

XGBoost Overall Accuracy: 0.9797687861271677


In [21]:
# 2. Apply CatBoost classifier
catboost_model = CatBoostClassifier(iterations=150, depth=5, learning_rate=0.3, loss_function='MultiClass', verbose=15)
catboost_model.fit(X_train, y_train)
catboost_predictions = catboost_model.predict(X_test)

print("\nCatBoost Classification Report:")
print(classification_report(y_test, catboost_predictions))
print("CatBoost Overall Accuracy:", accuracy_score(y_test, catboost_predictions))

0:	learn: 0.9474975	total: 1.68ms	remaining: 250ms
15:	learn: 0.2026547	total: 37.2ms	remaining: 312ms
30:	learn: 0.1136741	total: 70.6ms	remaining: 271ms
45:	learn: 0.0760837	total: 104ms	remaining: 236ms
60:	learn: 0.0548474	total: 142ms	remaining: 207ms
75:	learn: 0.0425822	total: 177ms	remaining: 172ms
90:	learn: 0.0346446	total: 209ms	remaining: 135ms
105:	learn: 0.0287692	total: 241ms	remaining: 100ms
120:	learn: 0.0247338	total: 276ms	remaining: 66.2ms
135:	learn: 0.0219709	total: 309ms	remaining: 31.8ms
149:	learn: 0.0195640	total: 336ms	remaining: 0us

CatBoost Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.92      0.96        83
           1       0.59      0.91      0.71        11
           2       1.00      1.00      1.00       235
           3       0.94      1.00      0.97        17

    accuracy                           0.98       346
   macro avg       0.88      0.96      0.91       346
weighted avg       0