In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


In [2]:
# Loading the car evaluation dataset
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/car/car.data"
column_names = ["buying", "maint", "doors", "persons", "lug_boot", "safety", "class"]
data = pd.read_csv(data_url, names=column_names, header=None)

In [3]:
data.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


In [6]:
# Preprocessing the data by encoding categorical variables using one-hot encoding
data = pd.get_dummies(data, columns=['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety'])


In [7]:
data.head()

Unnamed: 0,class,buying_high,buying_low,buying_med,buying_vhigh,maint_high,maint_low,maint_med,maint_vhigh,doors_2,...,doors_5more,persons_2,persons_4,persons_more,lug_boot_big,lug_boot_med,lug_boot_small,safety_high,safety_low,safety_med
0,unacc,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,0,1,0,1,0
1,unacc,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,0,1,0,0,1
2,unacc,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,0,1,1,0,0
3,unacc,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,1,0,0,1,0
4,unacc,0,0,0,1,0,0,0,1,1,...,0,1,0,0,0,1,0,0,0,1


In [8]:
# Spliting the dataset into training and test sets
X = data.drop("class", axis=1)
y = data["class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [10]:
# Defining the hyperparameters 
param_grid = {
    "max_depth": [2, 4, 6, 8, 10],
    "min_samples_leaf": [1, 2, 4, 8],}

In [11]:
# Perform a grid search with cross-validation to find the best hyperparameters
clf = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(clf, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)
print("Best hyperparameters:", grid_search.best_params_)

Best hyperparameters: {'max_depth': 10, 'min_samples_leaf': 1}


In [12]:
# Train the model on the full training set using the found best hyperparameters
best_clf = grid_search.best_estimator_
best_clf.fit(X_train, y_train)

In [14]:
# Evaluate the model on the test set
y_pred = best_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 95.09%


In [15]:
# Creating a dataframe with actual and predicted values
results_df = pd.DataFrame({"Actual": y_test, "Predicted": y_pred})

In [16]:
# Print the first 10 rows of the dataframe
print(results_df.head(10))

     Actual Predicted
599   unacc     unacc
1201    acc      good
628   unacc     unacc
1498    acc       acc
1263  unacc     unacc
931     acc     unacc
23    unacc     unacc
844   unacc     unacc
964   unacc     unacc
764   unacc     unacc


Conclusion:

In this project, we used the UCI Car Evaluation Dataset, which contains data on cars and their acceptability based on various attributes. The task was to build a model to classify the acceptability of cars based on their attributes.

We started by loading the dataset and preprocessing it using one-hot encoding to encode categorical variables. We then split the dataset into a training set and a test set.

Next, we used the Decision Tree Classifier model to classify the cars based on their attributes. We chose two hyperparameters, max_depth and min_samples_leaf, and experimented with different values to find the best hyperparameters using grid search with cross-validation.

We trained the model on the full training set using the found best hyperparameters and evaluated the model's performance on the test set using the accuracy score. The model achieved an accuracy of 95.09%, indicating that it can accurately classify the acceptability of cars based on their attributes.

In conclusion, we successfully built a model to classify car acceptability based on their attributes and achieved a high accuracy score. The model could be useful for various applications, such as car recommendation systems or quality control in car manufacturing.