In [59]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder

In [61]:
# Load dataset
dataset = pd.read_csv("./CKD.csv")

In [63]:
# Separate independent and dependent variables
independent = dataset.drop("classification", axis=1)
dependent = dataset["classification"]

In [65]:
# convert categorical data into binary format(one-hot encoding)
independent = pd.get_dummies(independent, drop_first=True)

In [67]:
# Dependent variable is categorical (e.g., 'yes'/'no'), encode it to numerical values
label_encoder = LabelEncoder()
dependent = label_encoder.fit_transform(dependent)

In [69]:
x_train, x_test, y_train, y_test = train_test_split(independent, dependent, test_size=1/3, random_state=0)

In [71]:
classifier = DecisionTreeClassifier(criterion = "entropy", random_state = 0)
classifier.fit(x_train, y_train)

y_predict = classifier.predict(x_test)
report = classification_report(y_test, y_predict)
print(report)

              precision    recall  f1-score   support

           0       0.86      0.96      0.91        51
           1       0.97      0.90      0.94        82

    accuracy                           0.92       133
   macro avg       0.92      0.93      0.92       133
weighted avg       0.93      0.92      0.93       133



In [73]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'splitter': ['best', "random"],
    'criterion': ['gini', 'entropy', 'log_loss']
}

grid = GridSearchCV(DecisionTreeClassifier(), param_grid, refit=True, verbose = 3, n_jobs=-1, scoring='f1_weighted')
grid.fit(x_train, y_train)

print("Best Parameters:", grid.best_params_)
y_predict = grid.predict(x_test)
report = classification_report(y_test, y_predict)
print(report)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
Best Parameters: {'criterion': 'gini', 'splitter': 'random'}
              precision    recall  f1-score   support

           0       0.86      0.98      0.92        51
           1       0.99      0.90      0.94        82

    accuracy                           0.93       133
   macro avg       0.92      0.94      0.93       133
weighted avg       0.94      0.93      0.93       133

