In [None]:
from sklearn.tree import DecisionTreeClassifier
# range of depth to test
depth = range(1,52,10)

# Create list for train and test accuracy
train_accuracy = []
validation_accuracy = []

# Loop through n_components
for d in depth:

    # Instantiate and fit Decision Tree
    tree = DecisionTreeClassifier(max_depth=d, random_state=42)
    tree.fit(X_train, y_train)

    # Evaluate the model
    train_accuracy.append(tree.score(X_train, y_train))
    validation_accuracy.append(tree.score(X_test, y_test))

    # print done statement for each depth
    print(f'Done with depth {d}.')

In [None]:
# Train and Validation Accuracy vs. Depth
plt.plot(depth, train_accuracy, label='Train Accuracy', color='red')
plt.plot(depth, validation_accuracy, label='Validation Accuracy', color='blue')
plt.title('Accuracy vs. Depth')
plt.xlabel('Depth')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.show()

# Find the index of the maximum validation accuracy
best_depth_index = validation_accuracy.index(max(validation_accuracy))

# Extract the best depth
best_depth = depth[best_depth_index]

print("Best Depth:", best_depth)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


# Create a decision tree classifier
dtree = DecisionTreeClassifier()

# Define the parameter grid with different max_depth values to test
param_grid = {'max_depth': [10, 20, 30, 50, 100]}

# Use GridSearchCV to search for the best max_depth
grid_search = GridSearchCV(dtree, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Get the best max_depth value
best_max_depth = grid_search.best_params_['max_depth']

# Visualize the grid search results
results = pd.DataFrame(grid_search.cv_results_)

# Plotting the performance for different max_depth values
plt.figure(figsize=(10, 6))
sns.lineplot(x='param_max_depth', y='mean_test_score', data=results, marker='o')

# Highlight the best max_depth value
plt.scatter(best_max_depth, grid_search.best_score_, color='red', marker='x', s=200, label='Best max_depth')

plt.title('Grid Search Results for Decision Tree Max Depth')
plt.xlabel('max_depth')
plt.ylabel('Mean Test Score (Accuracy)')
plt.legend()
plt.grid(True)
plt.show()

print("Best max_depth:", best_max_depth)