In [117]:
import numpy as np
import pandas as pd

__Importing the Car Evaluation Dataset__

In [119]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
X = car_evaluation.data.features 
y = car_evaluation.data.targets 

In [120]:
print(type(X))
print(type(y))

print(X.shape)
print(y.shape)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
(1728, 6)
(1728, 1)


__One Hot Encoding__

In [122]:
X_encoded = pd.get_dummies(X) # One hot encoding
y_encoded = pd.get_dummies(y) # One hot encoding

# print(X_encoded)
# print(y_encoded)

print(type(X))
print(type(y))

print(X_encoded.shape)
print(y_encoded.shape)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
(1728, 21)
(1728, 4)


__Dataset Partitioning__

In [124]:
from sklearn.model_selection import train_test_split

# data split, 70% training and 30% temp (temp = validation + test)
X_train, X_temp, y_train, y_temp = train_test_split(X_encoded, y_encoded, test_size=0.3, random_state=42)

# 30% temp data into 15% validation and 15% test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

__Building a Single Decision Tree__

In [126]:
from sklearn.metrics import confusion_matrix
import numpy as np

def calculate_metrics(y_true, y_pred):
    # Initialize confusion matrix counts
    acc_acc = acc_good = acc_unacc = acc_vgood = 0
    good_acc = good_good = good_unacc = good_vgood = 0
    unacc_acc = unacc_good = unacc_unacc = unacc_vgood = 0
    vgood_acc = vgood_good = vgood_unacc = vgood_vgood = 0

    y_true = y_true.to_numpy()
    
    # # Calculate confusion matrix manually
    # for actual, predicted in zip(y_true, y_pred):
    for i in range(len(y_true)):
        actual = y_true[i]
        # actual = y_true.iloc[i].values # <class 'pandas.core.frame.DataFrame'>
        predicted = y_pred[i] # <class 'numpy.ndarray'>
        
        if actual[0] and predicted[0]:
            acc_acc += 1
        elif actual[0] and predicted[1]:
            acc_good += 1
        elif actual[0] and predicted[2]:
            acc_unacc += 1
        elif actual[0] and predicted[3]:
            acc_vgood += 1
        elif actual[1] and predicted[0]:
            good_acc += 1
        elif actual[1] and predicted[1]:
            good_good += 1
        elif actual[1] and predicted[2]:
            good_unacc += 1
        elif actual[1] and predicted[3]:
            good_vgood += 1
        elif actual[2] and predicted[0]:
            unacc_acc += 1
        elif actual[2] and predicted[1]:
            unacc_good += 1
        elif actual[2] and predicted[2]:
            unacc_unacc += 1
        elif actual[2] and predicted[3]:
            unacc_vgood += 1
        elif actual[3] and predicted[0]:
            vgood_acc += 1
        elif actual[3] and predicted[1]:
            vgood_good += 1
        elif actual[3] and predicted[2]:
            vgood_unacc += 1
        elif actual[3] and predicted[3]:
            vgood_vgood += 1

    # Confusion matrix as an array
    confusion_matrix = [
        [acc_acc, acc_good, acc_unacc, acc_vgood],
        [good_acc, good_good, good_unacc, good_vgood],
        [unacc_acc, unacc_good, unacc_unacc, unacc_vgood],
        [vgood_acc, vgood_good, vgood_unacc, vgood_vgood]
    ]

    # Accuracy
    total_correct = acc_acc + good_good + unacc_unacc + vgood_vgood
    total_predictions = sum(sum(row) for row in confusion_matrix)
    accuracy = total_correct / total_predictions

    # Precision calculations
    Precision_of_acc = acc_acc / (acc_acc + good_acc + unacc_acc + vgood_acc)
    Precision_of_good = good_good / (acc_good + good_good + unacc_good + vgood_good)
    Precision_of_unacc = unacc_unacc / (acc_unacc + good_unacc + unacc_unacc + vgood_unacc)
    Precision_of_vgood = vgood_vgood / (acc_vgood + good_vgood + unacc_vgood + vgood_vgood)
    average_precision = (Precision_of_acc + Precision_of_good + Precision_of_unacc + Precision_of_vgood) / 4.0

    # Recall calculations
    Recall_of_acc = acc_acc / (acc_acc + acc_good + acc_unacc + acc_vgood)
    Recall_of_good = good_good / (good_acc + good_good + good_unacc + good_vgood)
    Recall_of_unacc = unacc_unacc / (unacc_acc + unacc_good + unacc_unacc + unacc_vgood)
    Recall_of_vgood = vgood_vgood / (vgood_acc + vgood_good + vgood_unacc + vgood_vgood)
    average_recall = (Recall_of_acc + Recall_of_good + Recall_of_unacc + Recall_of_vgood) / 4.0

    # F1 Score
    average_f1_score = 2 * ((average_precision * average_recall) / (average_precision + average_recall))

    # Print or return metrics
    print("Confusion Matrix:")
    for row in confusion_matrix:
        print(row)

    print(f"Accuracy: {accuracy}")
    print(f"Average Precision: {average_precision}")
    print(f"Average Recall: {average_recall}")
    print(f"Average F1 Score: {average_f1_score}")

In [127]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()
model.fit(X_train, y_train)

y_pred_val = model.predict(X_val)
print("Validation results of Decision Tree: ")
calculate_metrics(y_val, y_pred_val)
print("-------------------------------")

y_pred_test = model.predict(X_test)
print("Test results of Decision Tree: ")
calculate_metrics(y_test, y_pred_test)

Validation results of Decision Tree: 
Confusion Matrix:
[50, 2, 3, 2]
[1, 8, 0, 1]
[1, 0, 177, 0]
[1, 1, 0, 12]
Accuracy: 0.9536679536679536
Average Precision: 0.8635005717552888
Average Recall: 0.8821794655177269
Average F1 Score: 0.8727400859269795
-------------------------------
Test results of Decision Tree: 
Confusion Matrix:
[51, 3, 6, 1]
[0, 9, 0, 0]
[2, 0, 178, 0]
[2, 1, 0, 7]
Accuracy: 0.9423076923076923
Average Precision: 0.8654929309820614
Average Recall: 0.8812386156648453
Average F1 Score: 0.8732948046085969


In [128]:
model1 = DecisionTreeClassifier(max_depth=10, min_samples_split=4, criterion='gini', min_samples_leaf=1, random_state=42)
model2 = DecisionTreeClassifier(max_depth=9, min_samples_split=5, criterion='entropy', min_samples_leaf=4, random_state=42)
model3 = DecisionTreeClassifier(max_depth=8, min_samples_split=2, criterion='gini', min_samples_leaf=10, random_state=42)
model4 = DecisionTreeClassifier(max_depth=11, min_samples_split=2, criterion='entropy', min_samples_leaf=1, random_state=42)

model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
model4.fit(X_train, y_train)

y_pred_val1 = model1.predict(X_val)
y_pred_val2 = model2.predict(X_val)
y_pred_val3 = model3.predict(X_val)
y_pred_val4 = model4.predict(X_val)

# Validation results for Model1
print("Validation results for Model1: ")
calculate_metrics(y_val, y_pred_val1)
print("-------------------------------")

# Validation results for Model2
print("Validation results for Model2: ")
calculate_metrics(y_val, y_pred_val2)
print("-------------------------------")

# Validation results for Model3
print("Validation results for Model3: ")
calculate_metrics(y_val, y_pred_val3)
print("-------------------------------")

# Validation results for Model4
print("Validation results for Model4: ")
calculate_metrics(y_val, y_pred_val4)
print("-------------------------------")

# Test results for Model4
y_pred_test = model4.predict(X_test)
print("Test results for Model4: ")
calculate_metrics(y_test, y_pred_test)

Validation results for Model1: 
Confusion Matrix:
[48, 2, 4, 2]
[0, 8, 0, 1]
[1, 0, 176, 0]
[1, 2, 0, 11]
Accuracy: 0.94921875
Average Precision: 0.8475396825396825
Average Recall: 0.8815240785579769
Average F1 Score: 0.8641979023582292
-------------------------------
Validation results for Model2: 
Confusion Matrix:
[46, 1, 4, 2]
[0, 8, 0, 1]
[3, 0, 175, 0]
[1, 2, 0, 11]
Accuracy: 0.9448818897637795
Average Precision: 0.8526601610679823
Average Recall: 0.8814184425801979
Average F1 Score: 0.8668008354841572
-------------------------------
Validation results for Model3: 
Confusion Matrix:
[47, 2, 7, 1]
[2, 6, 0, 2]
[3, 0, 175, 0]
[5, 0, 0, 9]
Accuracy: 0.915057915057915
Average Precision: 0.8215249662618084
Average Recall: 0.7626411534454113
Average F1 Score: 0.7909886975362169
-------------------------------
Validation results for Model4: 
Confusion Matrix:
[52, 1, 2, 2]
[0, 8, 0, 1]
[0, 0, 178, 0]
[1, 1, 0, 12]
Accuracy: 0.9689922480620154
Average Precision: 0.8925052410901468
Averag

In [129]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_val)
print("Validation results of XGBoost: ")
calculate_metrics(y_val, y_pred_xgb)
print("-------------------------------")

y_pred_xgb = xgb_model.predict(X_test)
print("Test results of XGBoost: ")
calculate_metrics(y_test, y_pred_xgb)

Validation results of XGBoost: 
Confusion Matrix:
[52, 2, 0, 1]
[0, 7, 0, 1]
[0, 0, 178, 0]
[1, 1, 0, 11]
Accuracy: 0.9763779527559056
Average Precision: 0.8818214804063861
Average Recall: 0.9166520979020979
Average F1 Score: 0.8988995109396035
-------------------------------
Test results of XGBoost: 
Confusion Matrix:
[52, 4, 0, 0]
[0, 8, 0, 0]
[1, 0, 179, 0]
[0, 0, 0, 7]
Accuracy: 0.9800796812749004
Average Precision: 0.9119496855345912
Average Recall: 0.9807539682539683
Average F1 Score: 0.9451012271738607


In [130]:
model1 = XGBClassifier(max_depth=10, colsample_bytree=0.7, learning_rate=0.01, n_estimators=200,subsample=1.0)
model2 = XGBClassifier(max_depth=11, colsample_bytree=0.8, learning_rate=0.05, n_estimators=100,subsample=0.5)
model3 = XGBClassifier(max_depth=12, colsample_bytree=0.9, learning_rate=0.1, n_estimators=150,subsample=0.7)
model4 = XGBClassifier(max_depth=13, colsample_bytree=1.0, learning_rate=0.2, n_estimators=170, subsample=0.9)

model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)
model4.fit(X_train, y_train)

y_pred_val1 = model1.predict(X_val)
y_pred_val2 = model2.predict(X_val)
y_pred_val3 = model3.predict(X_val)
y_pred_val4 = model4.predict(X_val)

# Validation results for model1
print("\nMetrics for Model 1:")
calculate_metrics(y_val, y_pred_val1)
print("-------------------------------")

# Validation results for model2
print("\nMetrics for Model 2:")
calculate_metrics(y_val, y_pred_val2)
print("-------------------------------")

# Validation results for model3
print("\nMetrics for Model 3:")
calculate_metrics(y_val, y_pred_val3)
print("-------------------------------")

# Validation results for model4
print("\nMetrics for Model 4:")
calculate_metrics(y_val, y_pred_val4)
print("-------------------------------")

# Test results for Model4
y_pred_test = model4.predict(X_test)
print("Test results for Model4: ")
calculate_metrics(y_test, y_pred_test)


Metrics for Model 1:
Confusion Matrix:
[47, 2, 0, 1]
[0, 3, 0, 1]
[0, 0, 174, 0]
[1, 0, 0, 11]
Accuracy: 0.9791666666666666
Average Precision: 0.8563301282051282
Average Recall: 0.9016666666666666
Average F1 Score: 0.8784138111255951
-------------------------------

Metrics for Model 2:
Confusion Matrix:
[51, 2, 0, 0]
[1, 3, 0, 0]
[0, 0, 177, 0]
[1, 0, 0, 10]
Accuracy: 0.9836734693877551
Average Precision: 0.8905660377358491
Average Recall: 0.9053387650085764
Average F1 Score: 0.897891642731014
-------------------------------

Metrics for Model 3:
Confusion Matrix:
[53, 2, 0, 1]
[1, 7, 0, 1]
[0, 0, 178, 0]
[1, 0, 0, 12]
Accuracy: 0.9765625
Average Precision: 0.8996392496392497
Average Recall: 0.911820818070818
Average F1 Score: 0.9056890750141248
-------------------------------

Metrics for Model 4:
Confusion Matrix:
[53, 2, 0, 1]
[1, 7, 0, 1]
[0, 0, 178, 0]
[1, 0, 0, 12]
Accuracy: 0.9765625
Average Precision: 0.8996392496392497
Average Recall: 0.911820818070818
Average F1 Score: 0.90