In [190]:
# Import Dependencies
import pandas as pd
from sklearn.model_selection import train_test_split

from scipy.io import arff
from sklearn.preprocessing import LabelEncoder

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from sklearn import svm
from sklearn.svm import SVC

from sklearn import metrics
from sklearn.metrics import f1_score

In [191]:
#Preprocessing and loading data by filling in empty spaces
file = arff.loadarff('chronic_kidney_disease_full.arff')
df = pd.DataFrame(file[0])

for i in df.columns:
        if df[i].dtype !=  'float64':
            lab = LabelEncoder()
            lab.fit(df[i])
            df[i] = lab.transform(df[i])

#Fill empty values of dataset
df.fillna(method = 'ffill', inplace = True)
df.fillna(method = 'bfill', inplace = True)


X = df.iloc[:, :-1].values
Y = df['class'].values

In [192]:
# Dividing the dataset for training and testing such that 20 % is used for testing and 80% is used for training and random_state is set as 33 to shuffle the data
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,random_state = 33)

In [193]:
# Printing the training and testing data for reference

print(x_train)
print(x_test)

[[60. 60.  3. ...  1.  1.  1.]
 [74. 60.  3. ...  1.  1.  1.]
 [56. 70.  4. ...  1.  1.  1.]
 ...
 [64. 70.  1. ...  1.  1.  1.]
 [36. 80.  4. ...  1.  1.  1.]
 [61. 80.  2. ...  2.  2.  2.]]
[[34. 90.  2. ...  1.  1.  1.]
 [70. 60.  5. ...  1.  1.  1.]
 [70. 80.  3. ...  1.  1.  1.]
 ...
 [50. 80.  3. ...  1.  1.  1.]
 [62. 80.  3. ...  1.  1.  1.]
 [30. 80.  3. ...  1.  1.  1.]]


In [194]:
# Create a SVM Classifier with rbf kernel and default parameters
svm_rbf = make_pipeline(StandardScaler(), SVC())

In [195]:
#Train the model using the training sets
svm_rbf.fit(x_train, y_train)

Pipeline(steps=[('standardscaler', StandardScaler()), ('svc', SVC())])

In [196]:
# Predict the response for test dataset and train dataset

y_test_pred = svm_rbf.predict(x_test)
y_train_pred = svm_rbf.predict(x_train)


In [197]:
# Print F measure of the algorithm's performance on the training and test sets

#print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

print("f-measure of test set:", f1_score(y_test, y_test_pred))
print("f-measure of train set:", f1_score(y_train, y_train_pred))


f-measure of test set: 0.9879518072289156
f-measure of train set: 1.0
