In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataset = pd.read_csv('CKD.csv')
dataset=pd.get_dummies(dataset,drop_first=True)

In [3]:
indep=dataset[['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes']] 
dep=dataset[['classification_yes']]

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(indep, dep, test_size = 1/3, random_state = 0)

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [6]:
from sklearn.tree import DecisionTreeClassifier

In [7]:
from sklearn.model_selection import GridSearchCV

In [8]:
param_grid = {'criterion':['gini','entropy'], 'max_features': ['auto','sqrt','log2'],'splitter':['best','random']} 

In [9]:
grid = GridSearchCV(DecisionTreeClassifier(), param_grid, refit = True, verbose = 3,n_jobs=-1,scoring='f1_weighted')

In [10]:
grid.fit(X_train, y_train) 

Fitting 5 folds for each of 12 candidates, totalling 60 fits


20 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
17 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\USER\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\USER\anaconda3\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\Users\USER\anaconda3\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\Users\USER\anaconda3\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterErro

In [11]:
re=grid.cv_results_

In [12]:
grid_predictions = grid.predict(X_test)

In [13]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, grid_predictions)

In [14]:
from sklearn.metrics import classification_report
clf_report = classification_report(y_test, grid_predictions)

In [15]:
from sklearn.metrics import f1_score
f1_macro=f1_score(y_test,grid_predictions,average='weighted')
print("The f1_macro value for best parameter {}:".format(grid.best_params_),f1_macro)

The f1_macro value for best parameter {'criterion': 'entropy', 'max_features': 'sqrt', 'splitter': 'random'}: 0.9400566944426594


In [16]:
print("The confusion Matrix:\n",cm)

The confusion Matrix:
 [[48  3]
 [ 5 77]]


In [17]:
print("The report:\n",clf_report)

The report:
               precision    recall  f1-score   support

       False       0.91      0.94      0.92        51
        True       0.96      0.94      0.95        82

    accuracy                           0.94       133
   macro avg       0.93      0.94      0.94       133
weighted avg       0.94      0.94      0.94       133



In [18]:
from sklearn.metrics import roc_auc_score

roc_auc_score(y_test,grid.predict_proba(X_test)[:,1])

0.9401004304160688

In [19]:
table=pd.DataFrame.from_dict(re)

In [20]:
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002602,0.00116,0.0,0.0,gini,auto,best,"{'criterion': 'gini', 'max_features': 'auto', ...",,,,,,,,9
1,0.001303,0.000401,0.0,0.0,gini,auto,random,"{'criterion': 'gini', 'max_features': 'auto', ...",,,,,,,,9
2,0.005398,0.001199,0.01719,0.002314,gini,sqrt,best,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.963284,0.924528,1.0,0.923652,0.943093,0.950911,0.028506,5
3,0.005996,0.001999,0.016997,0.002972,gini,sqrt,random,"{'criterion': 'gini', 'max_features': 'sqrt', ...",1.0,0.92351,0.962573,0.981217,0.925146,0.958489,0.030304,3
4,0.004603,0.001868,0.016294,0.002037,gini,log2,best,"{'criterion': 'gini', 'max_features': 'log2', ...",0.908877,0.90361,1.0,0.943651,0.943093,0.939846,0.034397,8
5,0.005197,0.000979,0.019389,0.002939,gini,log2,random,"{'criterion': 'gini', 'max_features': 'log2', ...",0.9451,0.981014,0.981217,0.944023,0.923652,0.955001,0.022651,4
6,0.0012,0.000979,0.0,0.0,entropy,auto,best,"{'criterion': 'entropy', 'max_features': 'auto...",,,,,,,,9
7,0.002597,0.002245,0.0,0.0,entropy,auto,random,"{'criterion': 'entropy', 'max_features': 'auto...",,,,,,,,9
8,0.005999,0.001264,0.016396,0.001018,entropy,sqrt,best,"{'criterion': 'entropy', 'max_features': 'sqrt...",1.0,0.981233,0.962573,0.922492,0.962264,0.965712,0.025715,2
9,0.004798,0.000747,0.019112,0.0031,entropy,sqrt,random,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.9451,0.981014,1.0,0.925524,0.981217,0.966571,0.027153,1
