In [1]:
# Importing the libraries
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [5]:
# Importing the dataset
dataset = pd.read_csv('kidney_disease.csv')
dataset.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,...,44.0,7800.0,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,...,38.0,6000.0,,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,...,31.0,7500.0,,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,...,35.0,7300.0,4.6,no,no,no,good,no,no,ckd


In [6]:
# removing row that have empty or no values for specific columns
dataset=dataset[pd.notnull(dataset['age'])]
dataset=dataset[pd.notnull(dataset['pcc'])]
dataset=dataset[pd.notnull(dataset['appet'])]
dataset=dataset[pd.notnull(dataset['cad'])]
dataset.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,,normal,notpresent,notpresent,...,44.0,7800.0,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,,normal,notpresent,notpresent,...,38.0,6000.0,,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,...,31.0,7500.0,,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,...,35.0,7300.0,4.6,no,no,no,good,no,no,ckd


In [9]:
# replacing null values with string normal 
dataset['rbc'].fillna('normal',inplace=True)
dataset['pc'].fillna('normal',inplace=True)
dataset.fillna(dataset.mean(),inplace=True)

# removing some entries in the classification column that had a tab pressed unintentionally while entering data
dataset=dataset[~dataset.classification.str.contains('ckd\t')]
dataset.head()

Unnamed: 0,id,age,bp,sg,al,su,rbc,pc,pcc,ba,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,0,48.0,80.0,1.02,1.0,0.0,normal,normal,notpresent,notpresent,...,44.0,7800.0,5.2,yes,yes,no,good,no,no,ckd
1,1,7.0,50.0,1.02,4.0,0.0,normal,normal,notpresent,notpresent,...,38.0,6000.0,4.687645,no,no,no,good,no,no,ckd
2,2,62.0,80.0,1.01,2.0,3.0,normal,normal,notpresent,notpresent,...,31.0,7500.0,4.687645,no,yes,no,poor,no,yes,ckd
3,3,48.0,70.0,1.005,4.0,0.0,normal,abnormal,present,notpresent,...,32.0,6700.0,3.9,yes,no,no,poor,yes,yes,ckd
4,4,51.0,80.0,1.01,2.0,0.0,normal,normal,notpresent,notpresent,...,35.0,7300.0,4.6,no,no,no,good,no,no,ckd


In [12]:
dataset.dtypes

id                  int64
age               float64
bp                float64
sg                float64
al                float64
su                float64
rbc                object
pc                 object
pcc                object
ba                 object
bgr               float64
bu                float64
sc                float64
sod               float64
pot               float64
hemo              float64
pcv               float64
wc                float64
rc                float64
htn                object
dm                 object
cad                object
appet              object
pe                 object
ane                object
classification     object
dtype: object

In [18]:
#Data Encoding

from sklearn import preprocessing

data_encoder = preprocessing.LabelEncoder()
dataset['rbc'] = data_encoder.fit_transform(dataset['rbc'])
dataset['pc'] = data_encoder.fit_transform(dataset['pc'])
dataset['pcc'] = data_encoder.fit_transform(dataset['pcc'])
dataset['ba'] = data_encoder.fit_transform(dataset['ba'])
dataset['htn'] = data_encoder.fit_transform(dataset['htn'])
dataset['dm'] = data_encoder.fit_transform(dataset['dm'])
dataset['cad'] = data_encoder.fit_transform(dataset['cad'])
dataset['appet'] = data_encoder.fit_transform(dataset['appet'])
dataset['pe'] = data_encoder.fit_transform(dataset['pe'])
dataset['ane'] = data_encoder.fit_transform(dataset['ane'])
dataset['classification'] = data_encoder.fit_transform(dataset['classification'])

dataset.dtypes

id                  int64
age               float64
bp                float64
sg                float64
al                float64
su                float64
rbc                 int64
pc                  int64
pcc                 int64
ba                  int64
bgr               float64
bu                float64
sc                float64
sod               float64
pot               float64
hemo              float64
pcv               float64
wc                float64
rc                float64
htn                 int64
dm                  int64
cad                 int64
appet               int64
pe                  int64
ane                 int64
classification      int64
dtype: object

In [81]:
dataset.head()
dataset.to_csv('cleanData.csv')

In [88]:
# splitting of data into X - variables & attributes and y - output (whether chronic kidney disease or not)
X = dataset.iloc[:, 1:24].values
Y = dataset.iloc[:, 25].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=0)
print(X.shape, Y.shape)

(382, 23) (382,)


In [33]:
#Machine Lerning

#Support Vector Machine(SVM)
from sklearn import svm
from sklearn.metrics import accuracy_score

clf = svm.SVC(gamma='scale')
clf.fit(X_train, Y_train)

pred = clf.predict(X_test)
print('Accuracy using Support Vector Machine(SVM): ', 100 * accuracy_score(Y_test, pred))

Accuracy using Support Vector Machine(SVM):  65.21739130434783


In [65]:
#Multilayer Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

clf = MLPClassifier(activation='relu', solver='adam', alpha=1e-5, hidden_layer_sizes=(20, 5), learning_rate='adaptive', random_state=1)
clf.fit(X_train, Y_train)

pred = clf.predict(X_test)
print('Accuracy using Multilayer Perceptron: ', 100 * accuracy_score(Y_test, pred))

Accuracy using Multilayer Perceptron:  93.04347826086956


In [41]:
#Gaussian Naive Bayes
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

clf = GaussianNB()
clf.fit(X_train, Y_train)

pred = clf.predict(X_test)
print('Accuracy using Gaussian Naive Bayes: ', 100 * accuracy_score(Y_test, pred))

Accuracy using Gaussian Naive Bayes:  99.1304347826087


In [58]:
#K-Neighbors Classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

clf = KNeighborsClassifier(n_neighbors=10)
clf.fit(X_train, Y_train)

pred = clf.predict(X_test)
print('Accuracy using K-Neighbors Classifier: ', 100 * accuracy_score(Y_test, pred))

Accuracy using K-Neighbors Classifier:  68.69565217391305


In [59]:
#Decision Trees
from sklearn import tree
from sklearn.metrics import accuracy_score

clf = tree.DecisionTreeClassifier()
clf.fit(X_train, Y_train)

pred = clf.predict(X_test)
print('Accuracy using Decision Trees: ', 100 * accuracy_score(Y_test, pred))

Accuracy using Decision Trees:  96.52173913043478


In [60]:
#Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

clf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
clf.fit(X_train, Y_train)

pred = clf.predict(X_test)
print('Accuracy using Random Forest Classifier: ', 100 * accuracy_score(Y_test, pred))

Accuracy using Random Forest Classifier:  98.26086956521739


In [136]:
import pickle

filename = 'RandomForestModel.sav'
pickle.dump(clf, open(filename, 'wb'))

In [126]:
arr = np.array([48, 80, 1.02, 1, 0, 1, 1, 0, 0, 121, 36, 1.2, 137.4816667, 4.63277592, 15.4, 44, 7800, 5.2, 1, 3, 1, 0, 0])
arr

array([4.80000000e+01, 8.00000000e+01, 1.02000000e+00, 1.00000000e+00,
       0.00000000e+00, 1.00000000e+00, 1.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 1.21000000e+02, 3.60000000e+01, 1.20000000e+00,
       1.37481667e+02, 4.63277592e+00, 1.54000000e+01, 4.40000000e+01,
       7.80000000e+03, 5.20000000e+00, 1.00000000e+00, 3.00000000e+00,
       1.00000000e+00, 0.00000000e+00, 0.00000000e+00])

In [108]:
test = np.reshape(arr, (1, -1))
test.shape

(1, 23)

In [138]:
print(clf.predict(test))

[1]


In [171]:
arr = np.array(X[358])
arr.shape

test = np.reshape(arr, (1, -1))
test.shape

x = clf.predict(test)[0]
if x == 1:
    print("YES")
else:
    print("NO")

YES


In [127]:
lst = [1,2,3]
nparr = np.asarray(lst)

In [128]:
nparr

array([1, 2, 3])

In [130]:
model = pickle.load(open(filename, 'rb'))
print(model.predict(test))

[1]


In [134]:
input_list = [48, 80, 1.02, 1, 0, 1, 1, 0, 0, 121, 36, 1.2, 137.4816667, 4.63277592, 15.4, 44, 7800, 5.2, 1, 3, 1, 0, 0]
np_input = np.asarray(input_list)
np_input = np.reshape(np_input, (1, -1))

filename = 'RandomForestModel.sav'
model = pickle.load(open(filename, 'rb'))
print(model.predict(np_input))

[1]


In [133]:
model = pickle.load(open(filename, 'rb'))
print(model.predict(np_input))

[1]


In [161]:
lst = []
lst.append(float(input("Enter: ")))
lst.append(float(input("Enter: ")))

Enter: 10.55
Enter: 10.978


In [162]:
print(lst)

[10.55, 10.978]
