In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
 




Best CART Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5}
Best CART Accuracy:  0.8080586080586081
Best ID3 Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2}
Best ID3 Accuracy:  0.8080952380952381


In [2]:
data = pd.read_csv('antfile17.csv')

In [3]:
X = data.drop(columns=['bug'])
y = data['bug']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [5]:
param_grid = {
    'max_depth': [3, 5, 7, 10, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': [None, 'sqrt', 'log2']
}

In [6]:
cart_model = DecisionTreeClassifier(criterion='gini', random_state=42)
cart_grid_search = GridSearchCV(cart_model, param_grid, cv=5, n_jobs=-1, scoring='accuracy')
cart_grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=DecisionTreeClassifier(random_state=42), n_jobs=-1,
             param_grid={'max_depth': [3, 5, 7, 10, None],
                         'max_features': [None, 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10]},
             scoring='accuracy')

In [7]:
cart_best_params = cart_grid_search.best_params_
cart_best_accuracy = cart_grid_search.best_score_

In [8]:
print("Best CART Parameters: ", cart_best_params)
print("Best CART Accuracy: ", cart_best_accuracy)

Best CART Parameters:  {'max_depth': 7, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5}
Best CART Accuracy:  0.8080586080586081


In [9]:
id3_model = DecisionTreeClassifier(criterion='entropy', random_state=42)
id3_grid_search = GridSearchCV(id3_model, param_grid, cv=5, n_jobs=-1, scoring='accuracy')
id3_grid_search.fit(X_train, y_train)

GridSearchCV(cv=5,
             estimator=DecisionTreeClassifier(criterion='entropy',
                                              random_state=42),
             n_jobs=-1,
             param_grid={'max_depth': [3, 5, 7, 10, None],
                         'max_features': [None, 'sqrt', 'log2'],
                         'min_samples_leaf': [1, 2, 4],
                         'min_samples_split': [2, 5, 10]},
             scoring='accuracy')

In [10]:
id3_best_params = id3_grid_search.best_params_
id3_best_accuracy = id3_grid_search.best_score_
 
print("Best ID3 Parameters: ", id3_best_params)
print("Best ID3 Accuracy: ", id3_best_accuracy)

Best ID3 Parameters:  {'max_depth': 3, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2}
Best ID3 Accuracy:  0.8080952380952381
