In [3]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the wine dataset
data = load_wine()
X, y = data.data, data.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Before Tuning: Default Decision Tree
dt_default = DecisionTreeClassifier(random_state=42)
dt_default.fit(X_train, y_train)

# Predictions
default_train_pred = dt_default.predict(X_train)
default_test_pred = dt_default.predict(X_test)

# Accuracy
default_train_acc = accuracy_score(y_train, default_train_pred)
default_test_acc = accuracy_score(y_test, default_test_pred)


In [5]:
# After Tuning: Hyperparameter Tuning with GridSearchCV
param_grid = {
    'max_depth': [3, 5, 7, None],              # Max tree depth
    'min_samples_split': [2, 5, 10],           # Min samples to split a node
    'min_samples_leaf': [1, 2, 4]              # Min samples in a leaf
}

dt = DecisionTreeClassifier(random_state=42)
grid_search = GridSearchCV(dt, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model after tuning
dt_tuned = grid_search.best_estimator_

# Predictions
tuned_train_pred = dt_tuned.predict(X_train)
tuned_test_pred = dt_tuned.predict(X_test)

# Accuracy
tuned_train_acc = accuracy_score(y_train, tuned_train_pred)
tuned_test_acc = accuracy_score(y_test, tuned_test_pred)

In [6]:
# Print results
print("Before Tuning (Default Decision Tree):")
print(f"Training Accuracy: {default_train_acc:.4f}")
print(f"Test Accuracy: {default_test_acc:.4f}\n")

print("After Tuning (Tuned Decision Tree):")
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Training Accuracy: {tuned_train_acc:.4f}")
print(f"Test Accuracy: {tuned_test_acc:.4f}")

# Tree depth comparison
print(f"\nDefault Tree Depth: {dt_default.tree_.max_depth}")
print(f"Tuned Tree Depth: {dt_tuned.tree_.max_depth}")

Before Tuning (Default Decision Tree):
Training Accuracy: 1.0000
Test Accuracy: 0.9444

After Tuning (Tuned Decision Tree):
Best Parameters: {'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2}
Training Accuracy: 0.9930
Test Accuracy: 0.9444

Default Tree Depth: 4
Tuned Tree Depth: 3
