Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd
from sklearnex import patch_sklearn
patch_sklearn()
from sklearnex.model_selection import train_test_split
from sklearnex import svm
from sklearn.metrics import accuracy_score

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


First we collect the data and Analyze it

In [2]:
kidney_stone_dataset = pd.read_csv('kidney_stone.csv',sep=',') 

In [3]:
kidney_stone_dataset.head()

Unnamed: 0,gravity,ph,osmo,cond,urea,calc,target
0,1.021,4.91,725,14.0,443,2.45,0
1,1.017,5.74,577,20.0,296,4.49,0
2,1.008,7.2,321,14.9,101,2.36,0
3,1.011,5.51,408,12.6,224,2.15,0
4,1.005,6.52,187,7.5,91,1.16,0


In [4]:
kidney_stone_dataset.shape

(79, 7)

In [5]:
kidney_stone_dataset.describe()

Unnamed: 0,gravity,ph,osmo,cond,urea,calc,target
count,79.0,79.0,79.0,79.0,79.0,79.0,79.0
mean,1.018114,6.028481,612.848101,20.813924,266.405063,4.138987,0.43038
std,0.007239,0.724307,237.514755,7.938994,131.25455,3.260051,0.498293
min,1.005,4.76,187.0,5.1,10.0,0.17,0.0
25%,1.012,5.53,413.0,14.15,160.0,1.46,0.0
50%,1.018,5.94,594.0,21.4,260.0,3.16,0.0
75%,1.0235,6.385,792.0,26.55,372.0,5.93,1.0
max,1.04,7.94,1236.0,38.0,620.0,14.34,1.0


In [6]:
kidney_stone_dataset['target'].value_counts()

0    45
1    34
Name: target, dtype: int64

0 --> Have kidney stone

1 --> Don't have kidney stone

In [7]:
kidney_stone_dataset.groupby('target').mean()

Unnamed: 0_level_0,gravity,ph,osmo,cond,urea,calc
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.015489,6.098667,565.288889,20.404444,237.111111,2.624889
1,1.021588,5.935588,675.794118,21.355882,305.176471,6.142941


In [8]:
X = kidney_stone_dataset.drop(columns = 'target', axis=1)
Y = kidney_stone_dataset['target']

In [9]:
print(X)

    gravity    ph  osmo  cond  urea   calc
0     1.021  4.91   725  14.0   443   2.45
1     1.017  5.74   577  20.0   296   4.49
2     1.008  7.20   321  14.9   101   2.36
3     1.011  5.51   408  12.6   224   2.15
4     1.005  6.52   187   7.5    91   1.16
..      ...   ...   ...   ...   ...    ...
74    1.025  7.90   721  23.6   301   9.04
75    1.017  4.81   410  13.3   195   0.58
76    1.024  5.40   803  21.8   394   7.82
77    1.016  6.81   594  21.4   255  12.20
78    1.015  6.03   416  12.8   178   9.39

[79 rows x 6 columns]


In [10]:
print(Y)

0     0
1     0
2     0
3     0
4     0
     ..
74    1
75    1
76    1
77    1
78    1
Name: target, Length: 79, dtype: int64


Train Test Split

In [11]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = 0.2, stratify=Y, random_state=2)

In [12]:
print(X.shape, X_train.shape, X_test.shape)

(79, 6) (63, 6) (16, 6)


Training the Model

In [13]:
classifier = svm.SVC(kernel='linear')

In [14]:
classifier.fit(X_train, Y_train)

Model Evaluation

Accuracy Score

In [15]:
X_train_prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, Y_train)

In [16]:
print('Accuracy score of the training data : ', training_data_accuracy)

Accuracy score of the training data :  0.8253968253968254


In [17]:
X_test_prediction = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_prediction, Y_test)

In [18]:
print('Accuracy score of the test data : ', test_data_accuracy)

Accuracy score of the test data :  0.6875


Making a Predictive System

In [19]:
input_data = (1.021,5.53,775,31.2,302,6.19)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = classifier.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print('The person dont have kidney stone')
else:
  print('The person have kidney stone')

[1]
The person have kidney stone




Saving the trained model

In [20]:
import pickle

In [21]:
filename = 'kidney_stone.sav'
pickle.dump(classifier, open(filename, 'wb'))

In [22]:
loaded_model = pickle.load(open('kidney_stone.sav', 'rb'))

In [23]:
input_data = (1.021,5.53,775,31.2,302,6.19)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

prediction = classifier.predict(input_data_reshaped)
print(prediction)

if (prediction[0] == 0):
  print('The person dont have kidney stone')
else:
  print('The person have kidney stone')

[1]
The person have kidney stone




In [24]:
for column in X.columns:
  print(column)

gravity
ph
osmo
cond
urea
calc
