In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the CSV dataset
data = pd.read_csv("modified_dataset.csv")

# Prepare the data
X = data.drop('target', axis=1)  # Features
y = data['target']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Define the grid of hyperparameters to search
param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [None, 10, 20, 30, 40],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2', None]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=dt_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# Perform grid search to find the best hyperparameters
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_

# Train the model with the best hyperparameters
best_dt_model = DecisionTreeClassifier(**best_params, random_state=42)
best_dt_model.fit(X_train, y_train)

# Make predictions using the best model
best_dt_y_pred = best_dt_model.predict(X_test)

# Evaluate the best model
best_dt_accuracy = accuracy_score(y_test, best_dt_y_pred)
print("Best Decision Tree Accuracy:", best_dt_accuracy)
print("Best Decision Tree Classification Report:")
print(classification_report(y_test, best_dt_y_pred))


450 fits failed out of a total of 1800.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
237 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\arkad\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\arkad\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\arkad\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\arkad\AppData\Local\Programs\Python\Python312\

Best Decision Tree Accuracy: 0.7692
Best Decision Tree Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.87      0.84      3437
           1       0.66      0.54      0.59      1563

    accuracy                           0.77      5000
   macro avg       0.73      0.71      0.72      5000
weighted avg       0.76      0.77      0.76      5000

