In [2]:
# pip install ucimlrepo
import numpy as np
import pandas as pd

__Importing the Car Evaluation Dataset__

In [4]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
car_evaluation = fetch_ucirepo(id=19) 
  
# data (as pandas dataframes) 
X = car_evaluation.data.features 
y = car_evaluation.data.targets 
  
# metadata 
print(car_evaluation.metadata) 
  
# variable information 
print(car_evaluation.variables)

{'uci_id': 19, 'name': 'Car Evaluation', 'repository_url': 'https://archive.ics.uci.edu/dataset/19/car+evaluation', 'data_url': 'https://archive.ics.uci.edu/static/public/19/data.csv', 'abstract': 'Derived from simple hierarchical decision model, this database may be useful for testing constructive induction and structure discovery methods.', 'area': 'Other', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 1728, 'num_features': 6, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['class'], 'index_col': None, 'has_missing_values': 'no', 'missing_values_symbol': None, 'year_of_dataset_creation': 1988, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5JP48', 'creators': ['Marko Bohanec'], 'intro_paper': {'ID': 249, 'type': 'NATIVE', 'title': 'Knowledge acquisition and explanation for multi-attribute decision making', 'authors': 'M. Bohanec, V. Rajkovič', 'venue': '8th Intl Workshop on Expert Systems and their Applications, 

In [18]:
print(type(X))
print(type(y))

print(X.shape)
print(y.shape)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
(1728, 6)
(1728, 1)


__One Hot Encoding (Train, Validation & Test Sets)__

In [22]:
X_encoded = pd.get_dummies(X) # One hot encoding
y_encoded = pd.get_dummies(y) # One hot encoding

# print(X_encoded)
# print(y_encoded)

print(type(X))
print(type(y))

print(X_encoded.shape)
print(y_encoded.shape)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
(1728, 21)
(1728, 4)


__Dataset Partitioning__

In [24]:
from sklearn.model_selection import train_test_split

# data split, 70% training and 30% temp (temp = validation + test)
X_train, X_temp, y_train, y_temp = train_test_split(X_encoded, y_encoded, test_size=0.3, random_state=42)

# 30% temp data into 15% validation and 15% test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [26]:
X_train

Unnamed: 0,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,doors_3,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
1178,False,False,True,False,False,False,True,False,False,False,...,True,False,True,False,True,False,False,True,False,False
585,True,False,False,False,True,False,False,False,False,True,...,False,False,False,True,False,False,True,False,True,False
1552,False,True,False,False,False,False,True,False,False,True,...,False,False,True,False,False,True,False,False,False,True
1169,False,False,True,False,False,False,True,False,False,False,...,True,True,False,False,True,False,False,True,False,False
1033,False,False,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130,False,False,True,False,False,False,True,False,False,True,...,False,False,False,True,False,True,False,True,False,False
1294,False,False,True,False,False,True,False,False,False,False,...,True,False,False,True,True,False,False,False,False,True
860,True,False,False,False,False,True,False,False,False,False,...,True,False,False,True,False,True,False,True,False,False
1459,False,True,False,False,True,False,False,False,False,False,...,False,True,False,False,False,False,True,False,False,True


In [28]:
y_train

Unnamed: 0,class_acc,class_good,class_unacc,class_vgood
1178,False,False,False,True
585,False,False,True,False
1552,True,False,False,False
1169,False,False,True,False
1033,False,False,True,False
...,...,...,...,...
1130,False,False,False,True
1294,False,True,False,False
860,True,False,False,False
1459,False,False,True,False


In [32]:
X_val

Unnamed: 0,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,doors_3,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
192,False,False,False,True,True,False,False,False,False,False,...,True,True,False,False,False,True,False,False,True,False
834,True,False,False,False,False,True,False,False,False,False,...,False,False,False,True,True,False,False,False,True,False
677,True,False,False,False,False,False,True,False,False,True,...,False,True,False,False,False,False,True,True,False,False
1516,False,True,False,False,False,False,True,False,True,False,...,False,True,False,False,False,True,False,False,False,True
1157,False,False,True,False,False,False,True,False,False,False,...,False,False,False,True,False,True,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
582,True,False,False,False,True,False,False,False,False,True,...,False,False,True,False,True,False,False,False,True,False
1450,False,True,False,False,True,False,False,False,False,True,...,False,False,False,True,False,False,True,False,False,True
244,False,False,False,True,False,False,True,False,False,True,...,False,True,False,False,False,False,True,False,False,True
907,False,False,True,False,False,False,False,True,False,True,...,False,False,True,False,True,False,False,False,False,True


In [34]:
y_val

Unnamed: 0,class_acc,class_good,class_unacc,class_vgood
192,False,False,True,False
834,False,False,True,False
677,False,False,True,False
1516,False,False,True,False
1157,False,False,False,True
...,...,...,...,...
582,False,False,True,False
1450,True,False,False,False
244,False,False,True,False
907,True,False,False,False


In [36]:
X_test

Unnamed: 0,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,doors_3,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
974,False,False,True,False,True,False,False,False,True,False,...,False,True,False,False,False,False,True,True,False,False
78,False,False,False,True,False,False,False,True,False,False,...,False,False,False,True,True,False,False,False,True,False
23,False,False,False,True,False,False,False,True,True,False,...,False,False,False,True,False,True,False,True,False,False
813,True,False,False,False,False,True,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
1356,False,True,False,False,False,False,False,True,False,False,...,False,True,False,False,True,False,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
998,False,False,True,False,True,False,False,False,True,False,...,False,False,False,True,True,False,False,True,False,False
1221,False,False,True,False,False,True,False,False,False,True,...,False,True,False,False,True,False,False,False,True,False
367,False,False,False,True,False,True,False,False,False,True,...,False,False,True,False,True,False,False,False,False,True
1428,False,True,False,False,True,False,False,False,True,False,...,False,False,False,True,True,False,False,False,True,False


In [38]:
y_test

Unnamed: 0,class_acc,class_good,class_unacc,class_vgood
974,False,False,True,False
78,False,False,True,False
23,False,False,True,False
813,False,False,True,False
1356,False,False,True,False
...,...,...,...,...
998,True,False,False,False
1221,False,False,True,False
367,True,False,False,False
1428,False,False,True,False


In [6]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()

In [7]:
classifier.fit(X_train, y_train)

In [8]:
from sklearn.metrics import accuracy_score, classification_report

# Make predictions on the validation set
y_val_pred = classifier.predict(X_val)

# Calculate accuracy
accuracy = accuracy_score(y_val, y_val_pred)
print(f"Validation Accuracy: {accuracy:.2f}")

# Print classification report for more detailed metrics
print("Classification Report:")
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 0.95
Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.88      0.91        57
           1       0.73      0.80      0.76        10
           2       0.98      0.99      0.99       178
           3       0.80      0.86      0.83        14

   micro avg       0.95      0.95      0.95       259
   macro avg       0.86      0.88      0.87       259
weighted avg       0.95      0.95      0.95       259
 samples avg       0.95      0.95      0.95       259



In [9]:
from sklearn.metrics import accuracy_score, classification_report

# Make predictions on the test set
y_test_pred = classifier.predict(X_test)

# Calculate accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Print classification report for more detailed metrics
print("Classification Report on Test Data:")
print(classification_report(y_test, y_test_pred))

Test Accuracy: 0.95
Classification Report on Test Data:
              precision    recall  f1-score   support

           0       0.93      0.87      0.90        61
           1       0.69      1.00      0.82         9
           2       0.98      0.99      0.98       180
           3       0.88      0.70      0.78        10

   micro avg       0.95      0.95      0.95       260
   macro avg       0.87      0.89      0.87       260
weighted avg       0.95      0.95      0.95       260
 samples avg       0.95      0.95      0.95       260

