In [1]:
# Import pandas for loading and manipulating the dataset
import pandas as pd

# URL for the car evaluation dataset
file_url = 'https://raw.githubusercontent.com/PacktWorkshops/The-Applied-Artificial-Intelligence-Workshop/master/Datasets/car.csv'

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(file_url)
# Import preprocessing module for encoding categorical data
from sklearn import preprocessing 

# Define a function that applies Label Encoding to a given column
def encode(data_frame, column):
    label_encoder = preprocessing.LabelEncoder()
    label_encoder.fit(data_frame[column].unique())
    return label_encoder.transform(data_frame[column])
# Loop through every column in the dataset and apply label encoding
for column in df.columns:
    df[column] = encode(df, column)
# Separate the target column ("class") from the feature columns
label = df.pop('class') 
# Import function to split data into training and testing sets
from sklearn import model_selection
# Split the dataset: 90% for training and 10% for testing
features_train, features_test, label_train, label_test = model_selection.train_test_split(df, label, test_size=0.1, random_state=88)

In [3]:
# Import RandomForestClassifier model
from sklearn.ensemble import RandomForestClassifier

In [4]:
# Create a Random Forest model with 100 trees and a max depth of 6
random_forest_classifier = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=168)

In [5]:
# Train (fit) the Random Forest model on the training data
random_forest_classifier.fit(features_train, label_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [6]:
# Generate predictions on the test set
rf_preds_test = random_forest_classifier.predict(features_test)
# Display the predictions
rf_preds_test

array([0, 0, 2, 0, 0, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 2, 3, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 0, 2, 0, 0, 0, 2, 2, 0, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0,
       0, 2, 2, 2, 0, 2, 3, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2,
       2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2,
       0, 2, 2, 0, 2, 2, 3, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0,
       2, 2, 2, 2, 2, 2, 2, 0, 3, 3, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2,
       2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 0, 2])

In [7]:
# Import classification_report for performance metrics
from sklearn.metrics import classification_report

In [8]:
# Print detailed classification metrics for Random Forest predictions
print(classification_report(label_test, rf_preds_test))

              precision    recall  f1-score   support

           0       0.67      0.76      0.71        42
           1       0.00      0.00      0.00         9
           2       0.92      0.96      0.94       114
           3       0.83      0.62      0.71         8

    accuracy                           0.84       173
   macro avg       0.60      0.59      0.59       173
weighted avg       0.80      0.84      0.82       173



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [9]:
# Import confusion_matrix for evaluating classification errors
from sklearn.metrics import confusion_matrix

In [10]:
# Display confusion matrix for the Random Forest model
confusion_matrix(label_test, rf_preds_test)

array([[ 32,   0,  10,   0],
       [  8,   0,   0,   1],
       [  5,   0, 109,   0],
       [  3,   0,   0,   5]])

In [11]:
# Extract and display feature importance scores from the Random Forest
rf_varimp = random_forest_classifier.feature_importances_
rf_varimp

array([0.12676384, 0.10366314, 0.02119621, 0.35266673, 0.05915769,
       0.33655239])

In [12]:
# Import the ExtraTreesClassifier model
from sklearn.ensemble import ExtraTreesClassifier

In [13]:
# Create an Extra Trees model with 100 trees and max depth of 6
extra_trees_classifier = ExtraTreesClassifier(n_estimators=100, max_depth=6, random_state=168)

In [14]:
# Train the Extra Trees model on the training data
extra_trees_classifier.fit(features_train, label_train)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,6
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,False


In [15]:
# Generate predictions on the test dataset
et_preds_test = extra_trees_classifier.predict(features_test)
# Display the predictions
et_preds_test

array([0, 0, 2, 0, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 0, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0,
       0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2,
       2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2,
       0, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 0,
       2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 0, 2, 2, 0, 2,
       2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 2])

In [16]:
# Print classification report for Extra Trees classifier
print(classification_report(label_test, et_preds_test))

              precision    recall  f1-score   support

           0       0.61      0.67      0.64        42
           1       0.00      0.00      0.00         9
           2       0.89      0.98      0.93       114
           3       1.00      0.12      0.22         8

    accuracy                           0.82       173
   macro avg       0.62      0.44      0.45       173
weighted avg       0.78      0.82      0.78       173



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [17]:
# Display confusion matrix for Extra Trees classifier

confusion_matrix(label_test, et_preds_test)

array([[ 28,   0,  14,   0],
       [  9,   0,   0,   0],
       [  2,   0, 112,   0],
       [  7,   0,   0,   1]])

In [18]:
# Extract and display feature importance scores for Extra Trees
et_varimp = extra_trees_classifier.feature_importances_
# Display the predictions
et_varimp

array([0.08844544, 0.0702334 , 0.01440408, 0.37662014, 0.05965896,
       0.39063797])