In [None]:
#Hyperparameter Tuning

In [1]:
# Step 1: Import Libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from scipy.stats import randint

In [3]:
# Step 2: Load the Iris Dataset
# Load the Iris dataset, which is a simple classification dataset
data = load_iris()
X = data.data  # Features
y = data.target  # Target labels\

In [5]:
# Step 3: Train a Base Model

# Split the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize a basic DecisionTreeClassifier model
model = DecisionTreeClassifier(random_state=42)

# Fit the model to the training data
model.fit(X_train, y_train)

# Predict on the test data and calculate the accuracy of the base model
y_pred = model.predict(X_test)
base_accuracy = accuracy_score(y_test, y_pred)
print(f'Base Model Accuracy: {base_accuracy * 100:.2f}%')  # Print base model accuracy

Base Model Accuracy: 100.00%


In [None]:
# Step 4: Apply Grid Search for Hyperparameter Tuning

In [7]:
# Define a hyperparameter grid for tuning the decision tree model
param_grid = {
    'max_depth': [3, 5, 10, None],  # Different depth values to try
    'min_samples_split': [2, 5, 10],  # Number of samples required to split a node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required at each leaf node
    'criterion': ['gini', 'entropy']  # Splitting criterion (Gini impurity or entropy)
}

# Initialize GridSearchCV, which will test all combinations of the parameters using cross-validation
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=1)

# Fit GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Retrieve the best model from Grid Search and print the best parameters
best_grid_model = grid_search.best_estimator_  # Best model from grid search
best_grid_params = grid_search.best_params_  # Best parameters found

# Predict with the best model and calculate accuracy
grid_y_pred = best_grid_model.predict(X_test)
grid_accuracy = accuracy_score(y_test, grid_y_pred)
print(f'Best Grid Search Accuracy: {grid_accuracy * 100:.2f}%')  # Print accuracy of grid search model
print(f'Best Grid Search Parameters: {best_grid_params}')  # Print best parameters found

Fitting 5 folds for each of 72 candidates, totalling 360 fits
Best Grid Search Accuracy: 100.00%
Best Grid Search Parameters: {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 1, 'min_samples_split': 10}
