In [94]:
import pandas as pd
import seaborn as ans 
import numpy as np

In [95]:
data = pd.read_csv("heart failure.csv")

In [96]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    int64  
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    int64  
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    int64  
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    int64  
 10  smoking                   299 non-null    int64  
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 30.5 KB


In [97]:
data.isnull().sum()

age                         0
anaemia                     0
creatinine_phosphokinase    0
diabetes                    0
ejection_fraction           0
high_blood_pressure         0
platelets                   0
serum_creatinine            0
serum_sodium                0
sex                         0
smoking                     0
time                        0
DEATH_EVENT                 0
dtype: int64

In [98]:
data.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


In [99]:
data = data.rename({"DEATH_EVENT": "death"},axis = 1)

In [100]:
data.death.head()

0    1
1    1
2    1
3    1
4    1
Name: death, dtype: int64

# Seperate x and y

In [101]:
x = data.drop("death",axis = 1)
y = data.death

# Train and Testing data --- Apply K Neighbors Classifier

In [102]:
from sklearn.model_selection import train_test_split as tts
from sklearn.neighbors import KNeighborsClassifier

In [103]:
xtrain,xtest,ytrain,ytest = tts(x,y,test_size = .30 ,random_state = 1)

In [187]:
KNC = KNeighborsClassifier()
KNC.fit(xtrain,ytrain)

KNeighborsClassifier()

In [None]:
KNC.predict(xtest)

In [None]:
data["Predicted Death"] = KNC.predict(x)

In [None]:
data.head()

# Evaluate KNeighborsClassifier Model Score before Tuning

In [188]:
KNC.score(xtest,ytest)

0.6111111111111112

# Tuning KNeighborsClassifier using Randomized Search CV

In [120]:
from sklearn.model_selection import RandomizedSearchCV

n_neighbors = [int(item) for item in np.linspace(10,100,25)]
weight = ['uniform', 'distance']
algorithm = ['auto', 'ball_tree', 'kd_tree', 'brute']

In [121]:
parameters = {
    "n_neighbors":n_neighbors,
    "weights" : weight ,
    "algorithm": algorithm
    }

In [122]:
rscv = RandomizedSearchCV(KNC,parameters,n_iter=200, cv = 4)

In [123]:
rscv.fit(xtrain,ytrain)

RandomizedSearchCV(cv=4, estimator=KNeighborsClassifier(), n_iter=200,
                   param_distributions={'algorithm': ['auto', 'ball_tree',
                                                      'kd_tree', 'brute'],
                                        'n_neighbors': [10, 13, 17, 21, 25, 28,
                                                        32, 36, 40, 43, 47, 51,
                                                        55, 58, 62, 66, 70, 73,
                                                        77, 81, 85, 88, 92, 96,
                                                        100],
                                        'weights': ['uniform', 'distance']})

In [124]:
result = pd.DataFrame(rscv.cv_results_)

In [125]:
result

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_weights,param_n_neighbors,param_algorithm,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001245,4.361819e-04,0.002000,2.665601e-07,uniform,10,auto,"{'weights': 'uniform', 'n_neighbors': 10, 'alg...",0.660377,0.634615,0.653846,0.596154,0.636248,0.025011,185
1,0.001126,2.182457e-04,0.001126,2.182801e-04,distance,10,auto,"{'weights': 'distance', 'n_neighbors': 10, 'al...",0.641509,0.711538,0.557692,0.615385,0.631531,0.055258,193
2,0.001000,0.000000e+00,0.002001,1.976862e-07,uniform,13,auto,"{'weights': 'uniform', 'n_neighbors': 13, 'alg...",0.641509,0.634615,0.634615,0.615385,0.631531,0.009738,193
3,0.001250,4.330502e-04,0.001250,4.330158e-04,distance,13,auto,"{'weights': 'distance', 'n_neighbors': 13, 'al...",0.641509,0.692308,0.576923,0.634615,0.636339,0.040904,181
4,0.001000,0.000000e+00,0.002001,1.032383e-07,uniform,17,auto,"{'weights': 'uniform', 'n_neighbors': 17, 'alg...",0.660377,0.634615,0.711538,0.596154,0.650671,0.041919,113
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,0.001000,1.032383e-07,0.001250,4.330502e-04,distance,92,brute,"{'weights': 'distance', 'n_neighbors': 92, 'al...",0.622642,0.711538,0.576923,0.653846,0.641237,0.048947,149
196,0.001000,1.032383e-07,0.001750,4.330502e-04,uniform,96,brute,"{'weights': 'uniform', 'n_neighbors': 96, 'alg...",0.660377,0.673077,0.673077,0.653846,0.665094,0.008310,5
197,0.001000,1.192093e-07,0.001000,1.032383e-07,distance,96,brute,"{'weights': 'distance', 'n_neighbors': 96, 'al...",0.622642,0.711538,0.576923,0.653846,0.641237,0.048947,149
198,0.000750,4.331534e-04,0.001750,4.331190e-04,uniform,100,brute,"{'weights': 'uniform', 'n_neighbors': 100, 'al...",0.660377,0.673077,0.673077,0.653846,0.665094,0.008310,5


# Evaluate Model After Randomized Search CV Tuning

In [127]:
rscv.best_score_

0.6699020319303338

In [128]:
new_rscv = rscv.best_estimator_

In [129]:
print(new_rscv)

KNeighborsClassifier(n_neighbors=25)


# Tuning KNeighborsClassifier using Grid Search CV


In [116]:
from sklearn.model_selection import GridSearchCV

In [179]:
gscv = GridSearchCV(KNC,parameters)
gscv.fit(xtrain,ytrain)

GridSearchCV(estimator=KNeighborsClassifier(),
             param_grid={'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
                         'n_neighbors': [10, 13, 17, 21, 25, 28, 32, 36, 40, 43,
                                         47, 51, 55, 58, 62, 66, 70, 73, 77, 81,
                                         85, 88, 92, 96, 100],
                         'weights': ['uniform', 'distance']})

In [183]:
result2 = pd.DataFrame(gscv.cv_results_)

In [184]:
result2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_algorithm,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001200,3.998995e-04,0.001805,3.990733e-04,auto,10,uniform,"{'algorithm': 'auto', 'n_neighbors': 10, 'weig...",0.642857,0.642857,0.642857,0.619048,0.585366,0.626597,0.022584,193
1,0.001101,4.880514e-04,0.001199,4.006204e-04,auto,10,distance,"{'algorithm': 'auto', 'n_neighbors': 10, 'weig...",0.642857,0.714286,0.642857,0.595238,0.609756,0.640999,0.041105,177
2,0.001000,1.784161e-07,0.002000,1.907349e-07,auto,13,uniform,"{'algorithm': 'auto', 'n_neighbors': 13, 'weig...",0.642857,0.642857,0.619048,0.642857,0.585366,0.626597,0.022584,193
3,0.001200,3.999949e-04,0.001200,4.000664e-04,auto,13,distance,"{'algorithm': 'auto', 'n_neighbors': 13, 'weig...",0.642857,0.714286,0.642857,0.642857,0.609756,0.650523,0.034363,113
4,0.001000,9.536743e-08,0.001400,4.900766e-04,auto,17,uniform,"{'algorithm': 'auto', 'n_neighbors': 17, 'weig...",0.642857,0.642857,0.619048,0.690476,0.585366,0.636121,0.034388,189
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,0.001000,1.168008e-07,0.001000,1.507891e-07,brute,92,distance,"{'algorithm': 'brute', 'n_neighbors': 92, 'wei...",0.619048,0.690476,0.642857,0.619048,0.658537,0.645993,0.026825,137
196,0.001000,9.536743e-08,0.001600,4.899987e-04,brute,96,uniform,"{'algorithm': 'brute', 'n_neighbors': 96, 'wei...",0.666667,0.666667,0.666667,0.666667,0.658537,0.665041,0.003252,5
197,0.001000,1.168008e-07,0.001200,4.000902e-04,brute,96,distance,"{'algorithm': 'brute', 'n_neighbors': 96, 'wei...",0.619048,0.690476,0.642857,0.619048,0.658537,0.645993,0.026825,137
198,0.000800,4.001141e-04,0.001600,4.900182e-04,brute,100,uniform,"{'algorithm': 'brute', 'n_neighbors': 100, 'we...",0.666667,0.666667,0.666667,0.666667,0.658537,0.665041,0.003252,5


# Evaluate Model After  GridSearchCV Tuning

In [185]:
gscv.best_score_

0.6651567944250871

In [186]:
gscv.best_estimator_

KNeighborsClassifier(n_neighbors=25)

# For Decision Tree Classifier Algorithm
Train and Testing data --- Apply Decision Tree Classifier

In [111]:
from sklearn.tree import DecisionTreeClassifier

In [110]:
Dtc = DecisionTreeClassifier()
Dtc.fit(xtrain,ytrain)

DecisionTreeClassifier()

In [109]:
Dtc.predict(xtest)

array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 1], dtype=int64)

In [108]:
data["Predicted Death_DTC"] = Dtc.predict(x)

In [107]:
data.tail()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,death
294,62.0,0,61,1,38,1,155000.0,1.1,143,1,1,270,0
295,55.0,0,1820,0,38,0,270000.0,1.2,139,0,0,271,0
296,45.0,0,2060,1,60,0,742000.0,0.8,138,0,0,278,0
297,45.0,0,2413,0,38,0,140000.0,1.4,140,1,1,280,0
298,50.0,0,196,0,45,0,395000.0,1.6,136,1,1,285,0


# Decision Tree Classifier Model Score before Tuning

In [106]:
Dtc.score(xtest,ytest)

0.8

# Tuning Decision Tree Classifier using Randomized Search CV

In [162]:
criterion = ["gini", "entropy"]
splitter = ["best", "random"]

Dtc_parameters = {
    "criterion" : criterion,
    "splitter" : splitter
    }

In [168]:
rscv_Dtc = RandomizedSearchCV(Dtc,Dtc_parameters,n_iter=150, cv = 3)

In [169]:
import warnings
warnings.filterwarnings('ignore')

rscv_Dtc.fit(xtrain,ytrain)


RandomizedSearchCV(cv=3, estimator=DecisionTreeClassifier(), n_iter=150,
                   param_distributions={'criterion': ['gini', 'entropy'],
                                        'splitter': ['best', 'random']})

In [170]:
pd.DataFrame(rscv_Dtc.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_splitter,param_criterion,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001504,0.0007016607,0.000501,0.0004056273,best,gini,"{'splitter': 'best', 'criterion': 'gini'}",0.757143,0.885714,0.73913,0.793996,0.06527,1
1,0.00117,0.0002433356,0.000667,0.000471539,random,gini,"{'splitter': 'random', 'criterion': 'gini'}",0.657143,0.671429,0.724638,0.684403,0.029042,4
2,0.0015,0.000410469,0.000503,0.000408339,best,entropy,"{'splitter': 'best', 'criterion': 'entropy'}",0.714286,0.8,0.594203,0.70283,0.084406,3
3,0.001,1.123916e-07,0.001,1.94668e-07,random,entropy,"{'splitter': 'random', 'criterion': 'entropy'}",0.8,0.742857,0.826087,0.789648,0.034758,2


# Evaluate Model After Randomized Search CV Tuning

In [171]:
rscv_Dtc.best_score_

0.7939958592132506

In [172]:
rscv_Dtc.best_estimator_

DecisionTreeClassifier()

# Tuning Decision Tree Classifier using Grid Search CV


In [173]:
gscv_Dtc = GridSearchCV(Dtc,Dtc_parameters)

In [174]:
gscv_Dtc.fit(xtrain,ytrain)

GridSearchCV(estimator=DecisionTreeClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'splitter': ['best', 'random']})

In [175]:
pd.DataFrame(gscv_Dtc.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_splitter,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001204,0.0005084324,0.000899,0.0001946524,gini,best,"{'criterion': 'gini', 'splitter': 'best'}",0.809524,0.738095,0.785714,0.738095,0.829268,0.780139,0.036996,1
1,0.001301,0.0004004156,0.000501,0.0004473964,gini,random,"{'criterion': 'gini', 'splitter': 'random'}",0.714286,0.738095,0.738095,0.642857,0.707317,0.70813,0.034914,3
2,0.0014,0.0004900376,0.0008,0.0004002334,entropy,best,"{'criterion': 'entropy', 'splitter': 'best'}",0.833333,0.738095,0.809524,0.714286,0.756098,0.770267,0.044471,2
3,0.001,1.907349e-07,0.001,9.536743e-08,entropy,random,"{'criterion': 'entropy', 'splitter': 'random'}",0.714286,0.619048,0.738095,0.714286,0.731707,0.703484,0.04326,4


# Evaluate Model After  GridSearchCV Tuning

In [176]:
gscv_Dtc.best_score_

0.7801393728222996

In [177]:
gscv_Dtc.best_estimator_

DecisionTreeClassifier()