# All about Iris

### **🔹 Step 1: Importing Libraries**
- **Data Handling:** `numpy`, `pandas`
- **Data Visualization:** `matplotlib.pyplot`, `seaborn`, `plot_tree`, `Line2D`, `profile_report`
- **Data Preprocessing:** `StandardScaler`, `RobustScaler`, `MinMaxScaler`, `MaxAbsScaler`, `Normalizer`
- **Machine Learning Models:** `DecisionTreeClassifier`, `RandomForestClassifier`, `ExtraTreesClassifier`, `LogisticRegression`, `KNeighborsClassifier`, `SVC`, `OneVsRestClassifier`
- **Dimensionality Reduction:** `PCA`, `LDA`, `t-SNE`, `KernelPCA`
- **Feature Selection:** `SelectKBest`, `RFE`, `SequentialFeatureSelector`
- **Hyperparameter Optimization:** `GridSearchCV`, `RandomizedSearchCV`, `Hyperopt`
- **Miscellaneous:** `pairwise_distances`

In [None]:
# Importing Required Libraries

# Numerical & Data Handling
import numpy as np
import pandas as pd
import scipy as sp
import math

# Visualization
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.lines import Line2D
%matplotlib inline
from sklearn.tree import plot_tree
from scipy.optimize import curve_fit

# Machine Learning Libraries
import torch
from torch import nn as nn
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier

# Feature Engineering
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler, MaxAbsScaler, Normalizer, LabelEncoder, OneHotEncoder
from sklearn.feature_selection import SelectKBest, chi2, f_classif, mutual_info_classif, RFE, SequentialFeatureSelector

# Dimensionality Reduction
from sklearn.decomposition import PCA, KernelPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.manifold import TSNE

# Hyperparameter Tuning with Bayesian Optimization
from hyperopt import hp, tpe, fmin, Trials

# Evaluation
from sklearn.metrics import pairwise_distances, classification_report, accuracy_score
import statsmodels.api as sm
from skimpy import skim

### **🔹 Step 2: Load and Prepare Data**
- Load the **Iris dataset** from `sklearn.datasets`
- Convert it into a **Pandas DataFrame** for easier processing
- Store **feature names** and **target class names**
- Convert target variable `y` to **NumPy array (`y_array`)** for compatibility
- Combine **features (`X`) and target (`y`) into a single DataFrame (`df`)** for EDA

</n>

- 150 samples of iris flowers (50 for each class)
- X: 4 features
	- sepal length (cm)
    - sepal width (cm)
	- petal length (cm)
	- petal width (cm)
- y: 3 classes
	- Setosa (0)
	- Versicolor (1)
	- Virginica (2)

In [None]:
# Data
iris = load_iris() 
feature_names = iris.feature_names # Feature column names
target_names = iris.target_names # Class labels ['setosa', 'versicolor', 'virginica']
X = pd.DataFrame(data=iris.data, columns=feature_names) # Array -> DataFrame
y = pd.DataFrame(data=iris.target, columns=['target']) # Array -> DataFrame
y_array = y.copy().to_numpy().ravel() # DataFrame -> Array
df = pd.concat([X, y], axis=1) # Full dataset for visualization
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Data split
X_train_array = np.array(X_train)
y_train_array = np.array(y_train)

### **🔹 Step 3: Data Standardization**
- Prevents large-valued features from **dominating** the model.

In [None]:
# Standardizing the Data
scaler = StandardScaler()
# scaler = MinMaxScaler()
# scaler = RobustScaler()
# scaler = MaxAbsScaler()
# scaler = Normalizer()
X_train_scaled = scaler.fit_transform(X_train)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=feature_names) # Convert back to DataFrame for better readability

### **🔹 Step 4.1: Data Summary**

In [None]:
# Dataset Shape
print(f"Feature Matrix Shape (X): {X.shape}")  # (150, 4)
print(f"Target Vector Shape (y): {y.shape}")   # (150, 1)

# Basic Statistics of Features
df.describe()

In [None]:
# Data Types
df.info()

# Handle Missing Values
df.isnull().sum() # Number of missing values per column
# df.fillna(0, inplace=True) # Fill NaN W/ 0
# df.fillna(df.mean(), inplace=True)  # Fill w/ mean
# df.interpolate(method = 'linear', inplace=True) # Fill w/ interpolation
# df.dropna(inplace=True) # Remove rows w/ NaN

In [None]:
# Report
skim(df)

In [None]:
# Curse of Dimensionality

# Average pair-wise L2-distance
def average_pairwise_distance(data):
    """
    Computes the average pairwise L2-distance (Euclidean distance) for a given dataset.

    Parameters:
    - data: A NumPy array or Pandas DataFrame containing numerical features.

    Returns:
    - The mean of all pairwise Euclidean distances between data points.
    """
    distances = pairwise_distances(data) # Compute pairwise distances
    upper_triangle_indices = np.triu_indices_from(distances, k=1) # Get upper triangle indices (excluding diagonal)
    return distances[upper_triangle_indices].mean() # Compute average pairwise distance

# Dimensions
dimensions = []
average_distances = []
for i in range(1, X.shape[1] + 1): # Iterate over increasing dimensions
    X_reduced = X.iloc[:, :i] # Use only the first i dimensions
    avg_distance = average_pairwise_distance(X_reduced) # Compute average pairwise distance
    dimensions.append(i) # Store the number of dimensions
    average_distances.append(avg_distance) # Store the computed distance
    print(f"Dimensions: {i}, Average Pairwise Distance: {avg_distance:.4f}")
    
# Plot
plt.figure(figsize=(8, 6))
plt.plot(dimensions, average_distances, marker='o', color='b')
plt.title("Curse of Dimensionality: Increasing Average Pairwise Distance")
plt.xlabel("Number of Dimensions")
plt.ylabel("Average Pairwise Distance")
plt.grid()
plt.show()

### **🔹 Step 4.2: Visualizing Feature Distributions**

In [None]:
# Histograms
X.hist(figsize=(10, 6), bins=10, edgecolor='black')
plt.suptitle("Feature Distribution - Iris Dataset")
plt.show()

# Boxplot for Outlier Detection
plt.figure(figsize=(10, 6))
sns.boxplot(data=X)
plt.title("Boxplot of Iris Features")
plt.xlabel("Features")
plt.ylabel("Value")
plt.show()

# Joint Plot
plt.figure(figsize=(12, 8))
sns.scatterplot(x=df.columns[0], y=df.columns[1], data = df, hue = 'target')
plt.show()

# Pair Plot
plt.figure(figsize=(12, 8))
sns.pairplot(df, hue='target')
plt.show()

# Count Plot
plt.figure(figsize=(12, 8))
sns.countplot(x=df.columns[4], data=df)
# df['target'].value_counts().plot(kind='bar')
plt.show()

# Heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(df.iloc[:, :4].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation - Iris")
plt.show()

In [None]:
def box_DESC(x):
	"""
    Computes and prints the boxplot statistics including:
    - Maximum whisker value (upper bound)
    - Q3 (75th percentile)
    - Q2 (Median)
    - Q1 (25th percentile)
    - Minimum whisker value (lower bound)
    - Number of outliers

    Parameters:
    - x: A numerical array or Pandas Series (single feature column)
    
    Prints:
    - Boxplot statistics
    - Number of outliers in the data
    """
	Q2 = np.median(x)
	Q1 = np.percentile(x, 25)
	Q3 = np.percentile(x, 75)
	IQR = Q3 - Q1
	whisker_upper = min(max(x), Q3 + 1.5 * IQR)
	whisker_lower = max(min(x), Q1 - 1.5 * IQR)
	X = np.array(['max', 'Q3', 'Q2', 'Q1', 'min'])
	Y = np.array([whisker_upper, Q3, Q2, Q1, whisker_lower])
	outliers = x[(x < whisker_lower) | (x > whisker_upper)]
	print(X,'\n', Y, '\n', "Number of outliers:", len(outliers))

box_DESC(df.iloc[:, 1])

### **🔹 Step 5.1: Feature Selection - Filter Method**
- Select features based on their **statistical relevance to the target variable**

In [None]:
# Chi square
chi_square = SelectKBest(score_func=chi2, k=2) # Select top 2 features
X_chi_square = chi_square.fit_transform(X, y_array.copy())
features_chi_square = X.columns[chi_square.get_support()]
print("Selected Features:", features_chi_square)

In [None]:
# ANOVA (F-test)
f_test = SelectKBest(score_func=f_classif, k=2) # Select top 2 features
X_selected = f_test.fit_transform(X, y_array.copy())
features_f_test = X.columns[f_test.get_support()]
print("Selected Features:", features_f_test)

In [None]:
# Mutual information
mi_scores = mutual_info_classif(X, y_array.copy())
df_mi = [pd.DataFrame({'Feature': X.columns, 'MI Score': mi_scores})
                      .sort_values(by='MI Score', ascending=False)]
print(df_mi)

### **🔹 Step 5.2: Feature Selection - Wrapper Method**
- Select features by **iteratively testing different subsets** and evaluating their impact on model performance

In [None]:
# SFS
model_rf = RandomForestClassifier()
model_sfs = SequentialFeatureSelector(model_rf, n_features_to_select=2, direction='forward')
X_rfe = model_sfs.fit_transform(X, y_array.copy())
features_sfs = X.columns[model_sfs.support_]
print("Selected Features by SFS:", features_sfs)

In [None]:
# RFE
model_dt = DecisionTreeClassifier()
model_rfe = RFE(model_dt, n_features_to_select=2, step=1, verbose=2)
X_rfe = model_rfe.fit_transform(X, y_array.copy())
features_rfe = X.columns[model_rfe.support_]
print("Selected Features by RFE:", features_rfe)

### **🔹 Step 5.3: Feature Selection - Embedded Method**
- Select features **during the model training** process by incorporating feature selection into the learning algorithm

In [None]:
# Extra tree
model_etc = ExtraTreesClassifier()
model_etc.fit(X, y_array.copy())

# Feature importance from tree-based model
feature_importances = [pd.DataFrame({'Feature': X.columns, 'Importance': model_etc.feature_importances_})
                                    .sort_values(by='Importance', ascending=False)]
print("Feature Importance using ExtraTreesClassifier:\n", feature_importances)

### **🔹 Step 6.1: Feature Extraction - Linear**
- **Method:** `PCA(n_components=2)`
- Converts high-dimensional data into **lower-dimensional** space while preserving variance

</n>

- **Method:** `LDA(n_components=2)`
- Finds the best axes to **separate different classes**

In [None]:
# Re-initialise y
y = pd.DataFrame(iris.target, columns=['target']) # Data reload to prevent the modification of y after fit_transform
y_copy = y.copy().squeeze().to_numpy().astype(int) # DataFrame -> Array

# PCA
model_pca = PCA(n_components=2)
X_train_pca = model_pca.fit_transform(X_train_scaled)
df_pca = pd.DataFrame(X_train_pca, columns=['PC1', 'PC2'])
df_pca['target'] = iris.target

# Plot
plt.figure(figsize=(8, 6))
colors = ['navy', 'turquoise', 'darkorange']
for color, i, target_name in zip(colors, [0, 1, 2], target_names): # Plot PCA results
    plt.scatter(df_pca.loc[df_pca['target'] == i, 'PC1'], df_pca.loc[df_pca['target'] == i, 'PC2'], color=color, lw=2, label=target_name, alpha=0.6) 
loadings = model_pca.components_.T * np.sqrt(model_pca.explained_variance_) # Extract feature contributions
for i, (x, y) in enumerate(loadings): # Plot vectors (arrows) for original features 
    plt.arrow(0, 0, x, y, color='black', width=0.02, head_width=0.1) # Scaling arrows
    plt.text(x * 1.2, y * 1.2, feature_names[i], color='black', fontsize=12)
plt.title('PCA of IRIS Dataset with Feature Vectors')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.axhline(0, color='gray', linestyle='--', linewidth=0.5) # Add horizontal line
plt.axvline(0, color='gray', linestyle='--', linewidth=0.5) # Add vertical line
plt.grid()
plt.show()

In [None]:
# Re-initialise y
y = pd.DataFrame(iris.target, columns=['target']) # Data reload to prevent the modification of y after fit_transform
y_copy = y.copy().squeeze().to_numpy().astype(int) # DataFrame -> Array

# Kernel PCA
model_kernel_pca = KernelPCA(n_components=2, kernel="rbf", gamma=15)
X_kernel_pca = model_kernel_pca.fit_transform(X_train_scaled)
df_kernel_pca = pd.DataFrame(X_kernel_pca, columns=['PC1', 'PC2'])
df_kernel_pca['target'] = y_copy 

# Plot
plt.figure(figsize=(12, 8))
sns.scatterplot(x=df_kernel_pca["PC1"], y=df_kernel_pca["PC2"], hue=df_kernel_pca["target"], palette="viridis", alpha=0.8)
plt.title('Kernel PCA w/ Feature Vectors - Iris')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.axhline(0, color='gray', linestyle='--', linewidth=0.5)  # Add horizontal line
plt.axvline(0, color='gray', linestyle='--', linewidth=0.5)  # Add vertical line
plt.grid()
plt.legend(title="Classes")
plt.show()

In [None]:
# Re-initialise y
y = pd.DataFrame(iris.target, columns=['target']) # Data reload to prevent the modification of y after fit_transform
y_copy = y.copy().squeeze().to_numpy().astype(int) # DataFrame -> Array

# LDA
model_lda = LDA(n_components=2)
X_lda = model_lda.fit_transform(X_train_scaled, y_copy)
df_lda = pd.DataFrame(X_lda, columns=['LD1', 'LD2'])
df_lda['target'] = y

# Plot
plt.figure(figsize=(8, 6))
colors = ['navy', 'turquoise', 'darkorange']
for color, i, target_name in zip(colors, [0, 1, 2], target_names): # Plot LDA results 
    subset = df_lda[df_lda['target'] == i]
    plt.scatter(subset['LD1'], subset['LD2'], color=color, lw=2, label=target_name, alpha=0.8)
loadings = model_lda.scalings_[:, :2]
loadings /= np.linalg.norm(loadings, axis=1, keepdims=True)  # Normalise for better visualisation
for i, (x, y) in enumerate(zip(loadings[:, 0], loadings[:, 1])):  
    plt.arrow(0, 0, x * 2, y * 2, color='black', width=0.02, head_width=0.1)
    plt.text(x * 2.2, y * 2.2, feature_names[i], color='black', fontsize=12)
plt.title('LDA of IRIS Dataset with Feature Vectors')
plt.xlabel('Linear Discriminant 1')
plt.ylabel('Linear Discriminant 2')
plt.legend(loc="best", shadow=False, scatterpoints=1)
plt.axhline(0, color='gray', linestyle='--', linewidth=0.5)  
plt.axvline(0, color='gray', linestyle='--', linewidth=0.5)  
plt.grid()
plt.show()

### **🔹 Step 6.2: Feature Extraction - Non-Linear**
- **Method:** `TSNE(n_components=2)`
- Preserves the **local structure** of data.

In [None]:
# Re-initialise y
y = pd.DataFrame(iris.target, columns=['target']) # Data reload to prevent the persistence of variable
y_array = y.squeeze().to_numpy().astype(int) # Convert 2-D matrix to 1-D array

# t-SNE
model_t_sne = TSNE(n_components=2, perplexity=30, random_state=42)
X_t_sne = model_t_sne.fit_transform(X_train_scaled)
df_t_sne = pd.DataFrame(X_t_sne, columns=['component 1', 'component 2'])
df_t_sne['target'] = y  # Add target labels

# Plot
plt.figure(figsize=(8, 6))
colors = ['pink', 'purple', 'yellow']
for i, target_name in enumerate(np.unique(y)):  
    subset = df_t_sne[df_t_sne['target'] == i]
    plt.scatter(subset['component 1'], subset['component 2'], color=colors[i], label=target_names[i], alpha=0.7)
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.legend()
plt.title('t-SNE Visualization of Iris Dataset')
plt.grid(True)
plt.show()

### **🔹 Step 7: Model Training**

In [None]:
def plot_decision_boundary(model, X, y, title):
    """
    Plots the decision boundary for the model.

    Parameters:
    - model: Trained model
    - X: Pandas DataFrame of training/test features (only 2 features allowed)
    - y: Pandas Series of target labels
    - title: Title for the plot
    """
    # Convert Pandas DataFrame to NumPy arrays
    X_np = X.to_numpy()
    y_np = y.to_numpy()

    # Define grid boundaries
    x_min, x_max = X_np[:, 0].min() - 0.5, X_np[:, 0].max() + 0.5
    y_min, y_max = X_np[:, 1].min() - 0.5, X_np[:, 1].max() + 0.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))

    # Predict class labels for each point in the grid
    Z = model.predict(pd.DataFrame(np.c_[xx.ravel(), yy.ravel()], columns=X.columns))
    Z = Z.reshape(xx.shape)

    # Plot decision boundary
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.Set1)
    plt.scatter(X_np[:, 0], X_np[:, 1], c=y_np, edgecolors='k', cmap=plt.cm.Set1)
    
    # Label axes using Pandas column names
    plt.xlabel(X.columns[0])
    plt.ylabel(X.columns[1])
    legend_labels = [Line2D([0], [0], marker='o', color='w', markerfacecolor=plt.cm.Set1(i/len(target_names)), markersize=8, label=target_names[i]) for i in range(len(target_names))]
    plt.legend(handles=legend_labels, title="Classes")
    plt.title(title)
    plt.show()
    
# Select 2 features
X_train_2D = X_train.iloc[:, :2] 
X_test_2D = X_test.iloc[:, :2]  

In [None]:
# Logistic Regression on 2 features
model_lr_2D = OneVsRestClassifier(LogisticRegression())
model_lr_2D.fit(X_train_2D, y_train)

# Evaluation
accuracy = model_lr_2D.score(X_test_2D, y_test)
print(f"Accuracy: {accuracy:.4f}")

# Plot
plot_decision_boundary(model_lr_2D, X_train_2D, y_train, 'Logistic Regression Decision Boundary - Iris')

In [None]:
# K-Nearest Neighbour
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train_2D, y_train.values.ravel())

# Evaluation  
accuracy = model_knn.score(X_test_2D, y_test)
print(f"Accuracy: {accuracy:.4f}")

plot_decision_boundary(model_knn, X_train_2D, y_train, title="K-Nearest Neighbour Decision Boundary - Iris")

In [None]:
# SVM
model_svm = SVC(kernel='rbf')
model_svm.fit(X_train_2D, y_train.values.ravel())

# Evaluation  
accuracy = model_svm.score(X_test_2D, y_test)
print(f"Accuracy: {accuracy:.4f}")

plot_decision_boundary(model_svm, X_train_2D, y_train, title="Support Vector Machine Decision Boundary - Iris")

In [None]:
# Decision Tree w/ Pruning
model_dt = DecisionTreeClassifier(criterion='gini', 
                                  random_state=42,
                                  max_depth=4,         # Limits tree depth
                                  min_samples_split=5, # Minimum samples to split a node
                                  min_samples_leaf=2,  # Minimum samples in a leaf
                                  max_leaf_nodes=10)   # Limits total leaf nodes
model_dt.fit(X_train, y_train)

# Feature importance
feature_importance_df = [pd.DataFrame({'Feature': X_train.columns,
                                      'Importance': model_dt.feature_importances_})
                                      .sort_values(by='Importance', ascending=False)]
print(feature_importance_df)

# Evaluation  
accuracy = model_dt.score(X_test, y_test)
print(f"Accuracy: {accuracy:.4f}")

# Plot
plt.figure(figsize=(15, 8))
class_names = [str(cls) for cls in model_dt.classes_]  # Convert class labels to strings
plot_tree(model_dt, filled=True, feature_names=X_train.columns, class_names=class_names)
plt.show()


### **🔹 Step 8: Hyperparameter Tuning**

In [None]:
# Random forest model
model_rf= RandomForestClassifier()

# Hyperparameter for grid search and random search
hyperparams = {'n_estimators': [10, 50, 100],   # Number of trees
			   'max_depth': [None, 10, 20],     # Maximum depth of trees
    		   'min_samples_split': [2, 5, 10], # Minimum number of samples to split an internal node
    		   'min_samples_leaf': [1, 2, 4],   # Minimum number of samples to be in a leaf node
    		   'bootstrap': [True, False]}      # Bootstrapping

In [None]:
# Grid search (Exhaustive search)
grid_search = GridSearchCV(estimator=model_rf, 	
                           param_grid=hyperparams, 	
                           cv=5)
grid_search.fit(X, y_array)

# Best hyperparameter
best_params = grid_search.best_params_
print("Best hyperparameters:", best_params)
print("Best cross-validation score:", grid_search.best_score_)

# Best model
best_model = grid_search.best_estimator_
best_model.fit(X, y_array)  

In [None]:
# Random search (Random trials)
random_search = RandomizedSearchCV(estimator=model_rf, 
                                   param_distributions=hyperparams,
                                   n_iter=10, 	
                                   cv=5, 
                                   random_state=42)
random_search.fit(X, y_array)

# Best hyperparameter
best_params = random_search.best_params_
print("Best hyperparameters:", best_params)
print("Best cross-validation score:", random_search.best_score_)

# Best model
best_model = random_search.best_estimator_
best_model.fit(X, y_array)

In [None]:
# Hyperparameter search space for Bayesian optimisation
space = {'max_depth': hp.choice('max_depth', np.arange(1, 20, dtype=int)),
         'min_samples_split': hp.quniform('min_samples_split', 2, 20, 1), # Ensures integer values ≥ 2
         'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 10, 1), # Ensures integer values ≥ 1
         'criterion': hp.choice('criterion', ['gini', 'entropy'])}

In [None]:
def objective(params):
    """
    Objective function for hyperparameter optimization of a Decision Tree Classifier.

    Parameters:
    - params: A dictionary containing hyperparameter values for 'min_samples_split' and 'min_samples_leaf'.

    Returns:
    - Negative cross-validated accuracy (to convert maximization to minimization).
    """
    params['min_samples_split'] = int(params['min_samples_split']) # Ensure integer type
    params['min_samples_leaf'] = int(params['min_samples_leaf'])  
    clf = DecisionTreeClassifier(**params)  # Create a decision tree w/ given parameters
    accuracy = cross_val_score(clf, X, y_array, cv=5).mean()  # Compute cross-validated accuracy
    return -accuracy  # Return negative accuracy (convert maximisation problem to minimisation problem)

In [None]:
# Bayesian optimisation
# 1. Select an initial random set of hyperparameters.
# 2. Evaluate them using the objective function.
# 3. Use past evaluations to build a probability model of which hyperparameters are likely to perform best.
# 4. Prioritise testing the most promising hyperparameters in the next iteration.
# 5. Repeat this for 50 evaluations (max_evals=50).
# 6. Find the best hyperparameter combination.
trials = Trials()
best_params = fmin(fn=objective,  
                   space=space,      
                   algo=tpe.suggest,  
                   max_evals=50,     
                   trials=trials)

# Convert to proper data type
criterion_mapping = ['gini', 'entropy']  # Fixed incorrect mapping
best_params['criterion'] = criterion_mapping[best_params['criterion']]  
best_params['min_samples_split'] = int(best_params['min_samples_split'])
best_params['min_samples_leaf'] = int(best_params['min_samples_leaf'])

# Best hyperparameters
print("Best hyperparameters:", best_params)

# Best model
best_model = DecisionTreeClassifier(**best_params)
best_model.fit(X, y_array)