In [11]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
from scipy.stats import randint
import matplotlib.pyplot as plt

# Load the dataset (e.g., Iris dataset)
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create the base Decision Tree model
clf = DecisionTreeClassifier(random_state=42)

# Define the parameter grid
param_dist = {
    'criterion': ['entropy'],             # Criterion for splitting
    'splitter': ['best', 'random'],               # Strategy used to choose the split at each node
    'max_depth': randint(1, 10),                  # Maximum depth of the tree
    'min_samples_split': randint(2, 10),          # Minimum number of samples required to split an internal node
    'min_samples_leaf': randint(1, 10),           # Minimum number of samples required to be at a leaf node
    'max_features': ['auto', 'sqrt', 'log2', None] # Number of features to consider when looking for the best split
}

# Perform Randomized Search with Cross-Validation
random_search = RandomizedSearchCV(clf, param_distributions=param_dist, 
                                   n_iter=100, cv=5, verbose=1, 
                                   random_state=42, n_jobs=-1)

# Fit the RandomizedSearchCV
random_search.fit(X_train, y_train)

# Get the best parameters
best_params = random_search.best_params_
print(f'Best Parameters: {best_params}')

# Predict with the best model
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Print Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)


Fitting 5 folds for each of 100 candidates, totalling 500 fits
Best Parameters: {'criterion': 'entropy', 'max_depth': 8, 'max_features': 'sqrt', 'min_samples_leaf': 3, 'min_samples_split': 2, 'splitter': 'best'}
Accuracy: 97.78%
Confusion Matrix:
[[19  0  0]
 [ 0 12  1]
 [ 0  0 13]]
