In [1]:
# Prediction of Chronic Kidney Disease
import pandas as pd  
import numpy as np 
import matplotlib.pyplot as plt
dataset=pd.read_csv("CKD.csv") 
print("dataset available")
df = pd.DataFrame(dataset)
num_rows = len(df.index)
num_columns = len(df.columns)
print(f"Number of rows: {num_rows}")
print(f"Number of columns: {num_columns}")


dataset available
Number of rows: 399
Number of columns: 25


In [2]:
dataset.columns

Index(['age', 'bp', 'sg', 'al', 'su', 'rbc', 'pc', 'pcc', 'ba', 'bgr', 'bu',
       'sc', 'sod', 'pot', 'hrmo', 'pcv', 'wc', 'rc', 'htn', 'dm', 'cad',
       'appet', 'pe', 'ane', 'classification'],
      dtype='object')

In [3]:
dataset=pd.get_dummies(dataset,drop_first=True)


In [4]:
dataset.columns

Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [5]:
dataset=dataset.replace({True:1,False:0}) 

  dataset=dataset.replace({True:1,False:0})


In [6]:
independent=dataset[['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes']]
dependent=dataset[['classification_yes']]

In [7]:
from sklearn.model_selection import train_test_split
x_train2,x_test2,y_train2,y_test2 = train_test_split(independent,dependent, test_size=0.30,random_state=0)

In [8]:
dataset["classification_yes"].value_counts()

classification_yes
1    249
0    150
Name: count, dtype: int64

In [9]:
x_train2.shape,x_test2.shape,y_train2.shape,y_test2.shape

((279, 27), (120, 27), (279, 1), (120, 1))

In [10]:
from sklearn.preprocessing import StandardScaler   #Standard input data
sc = StandardScaler()
x_train2 = sc.fit_transform(x_train2)
x_test2 = sc.transform(x_test2)

In [None]:
#from sklearn.preprocessing import StandardScaler  #Standard output data
#scy = StandardScaler()
#y_train1 = scy.fit_transform(y_train1)
#y_test1 = scy.transform(y_test1)

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier as DTC
param_grid = {'criterion':['entropy','gini'],'max_features':[1,'log2','sqrt'],'splitter':['best','random']}
grid = GridSearchCV(DTC(), param_grid, refit = True, verbose = 3,cv=5,n_jobs = -1 )
grid.fit(x_train2,y_train2)
print("Model Created")

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Model Created


In [12]:
result = grid.cv_results_
grid_predictions =  grid.predict(x_test2)
from sklearn.metrics import confusion_matrix 
cm = confusion_matrix(y_test2,grid_predictions) 
print("The cm vaule for best parameter {} is:'\n'".format(grid.best_params_),cm) 

The cm vaule for best parameter {'criterion': 'gini', 'max_features': 1, 'splitter': 'best'} is:'
' [[44  1]
 [ 2 73]]


In [21]:
from sklearn.metrics import classification_report
cls_report = classification_report(y_test2,grid_predictions)
print ("The report for Decision Tree Classification is: '\n'", cls_report)

The report for Decision Tree Classification is: '
'               precision    recall  f1-score   support

           0       0.96      0.98      0.97        45
           1       0.99      0.97      0.98        75

    accuracy                           0.97       120
   macro avg       0.97      0.98      0.97       120
weighted avg       0.98      0.97      0.98       120



In [14]:
#result
table = pd.DataFrame.from_dict(result)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.007802,0.002349,0.006763,0.000301,entropy,1,best,"{'criterion': 'entropy', 'max_features': 1, 's...",0.928571,0.892857,0.964286,0.964286,0.981818,0.946364,0.031871,7
1,0.006432,0.00193,0.006099,0.000752,entropy,1,random,"{'criterion': 'entropy', 'max_features': 1, 's...",0.910714,0.982143,0.928571,0.946429,0.963636,0.946299,0.025163,8
2,0.005087,0.000813,0.005704,0.000457,entropy,log2,best,"{'criterion': 'entropy', 'max_features': 'log2...",0.964286,0.982143,0.892857,0.875,0.945455,0.931948,0.041277,12
3,0.004182,0.000545,0.006417,0.000767,entropy,log2,random,"{'criterion': 'entropy', 'max_features': 'log2...",0.946429,0.964286,0.982143,0.982143,0.981818,0.971364,0.014238,3
4,0.004597,0.000804,0.005469,0.000306,entropy,sqrt,best,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.964286,0.964286,0.946429,0.875,0.945455,0.939091,0.03308,9
5,0.004889,0.001229,0.00706,0.001398,entropy,sqrt,random,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.946429,0.946429,1.0,0.928571,0.945455,0.953377,0.024283,5
6,0.004682,0.000403,0.006107,0.000622,gini,1,best,"{'criterion': 'gini', 'max_features': 1, 'spli...",0.982143,0.964286,0.982143,0.982143,0.963636,0.97487,0.00891,1
7,0.004234,0.000383,0.006016,0.00061,gini,1,random,"{'criterion': 'gini', 'max_features': 1, 'spli...",0.946429,0.946429,0.928571,0.928571,0.945455,0.939091,0.008596,9
8,0.004348,0.000238,0.005661,0.000451,gini,log2,best,"{'criterion': 'gini', 'max_features': 'log2', ...",0.946429,0.946429,0.928571,0.964286,1.0,0.957143,0.024223,4
9,0.004793,0.000832,0.008847,0.003803,gini,log2,random,"{'criterion': 'gini', 'max_features': 'log2', ...",0.946429,0.946429,1.0,0.946429,0.927273,0.953312,0.024495,6


In [16]:
#F1 SCORE
from sklearn.metrics import f1_score
f1_macro =  f1_score(y_test2,grid_predictions,average='weighted')
print("The f1 score vaule for best parameter {} is:'\n'".format(grid.best_params_),f1_macro) 

The f1 score vaule for best parameter {'criterion': 'gini', 'max_features': 1, 'splitter': 'best'} is:'
' 0.975053470019913


In [15]:
#roc_auc Curve  1 is the best
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test2,grid.predict_proba(x_test2)[:,1])

np.float64(0.9755555555555556)

In [17]:
grid

In [18]:
dataset.columns

Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [19]:
age_input=float(input("age"))
bp_input=float(input("bp"))
al_input=float(input("al"))
su_input=float(input("su"))
bgr_input=float(input("bgr"))
bu_input=float(input("bu"))
sc_input=float(input("sc"))
sod_input=float(input("sod"))
pot_input=float(input("pot"))
hrmo_input=float(input("hrmo"))
pcv_input=float(input("pcv"))
wc_input=float(input("wc"))
rc_input=float(input("rc"))
sg_b_input=float(input("sg_b"))
sg_c_input=float(input("sg_c"))
sg_d_input=float(input("sg_d"))
sg_e_input=float(input("sg_e"))
rbc_input=float(input("rbc_normal"))
pc_input=float(input("pc_normal"))
pcc_input=float(input("pcc_present"))
ba_input=float(input("ba_present"))
htn_input=float(input("htn_yes"))
dm_input=float(input("dm_yes"))
cad_input=float(input("cad_yes"))
appet_input=float(input("appet_yes"))
pe_input=float(input("pe_yes"))
ane_input=float(input("ane_yes"))

age 35
bp 120
al 4
su 0
bgr 94
bu 67
sc 0.7
sod 137.528754
pot 4.62724359
hrmo 10.7
pcv 34
wc 12300
rc 4.705597015
sg_b 1
sg_c 0
sg_d 0
sg_e 0
rbc_normal 1
pc_normal 1
pcc_present 0
ba_present 0
htn_yes 0
dm_yes 0
cad_yes 0
appet_yes 0
pe_yes 0
ane_yes 1


In [20]:
Future_Prediction=grid.predict([[age_input,bp_input,al_input,su_input,bgr_input,bu_input,sc_input,sod_input,pot_input,hrmo_input,pcv_input,wc_input,
                                 rc_input,sg_b_input,sg_c_input,sg_d_input,sg_e_input,rbc_input,pc_input,pcc_input,ba_input,htn_input,dm_input,
                                 cad_input,appet_input,pe_input,ane_input]])
print("Future_Prediction={}".format(Future_Prediction))
print("The possibility if Kidney Disease is:",Future_Prediction)


Future_Prediction=[1]
The possibility if Kidney Disease is: [1]
