In [1]:
#import the libraries 
import numpy as np 
import pandas as pd 
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [2]:
#import the dataset 
column_names = ['pregnancies', 'glucose', 'bpressure', 'skinfold', 'insulin', 'bmi', 'pedigree', 'age', 'class']
df = pd.read_csv('medical.csv', names = column_names)

In [3]:
df.head()

Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
df.shape

(768, 9)

In [5]:
#extract features
X = df.iloc[:, :8]
X.head()

Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [6]:
#extract class labels 
y = df['class']
y.head()

0    1
1    0
2    1
3    0
4    1
Name: class, dtype: int64

In [7]:
#split the data 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(576, 8) (576,) (192, 8) (192,)


In [8]:
#Normalize features
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)

In [9]:
#training the support vector machine 
clf = svm.SVC(kernel = 'sigmoid')
clf.fit(X_train, y_train)

SVC(kernel='sigmoid')

In [10]:
#decision boundary
y_pred = clf.predict(X_train)
print(y_pred)
print(accuracy_score(y_train, y_pred))

[0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0
 0 1 1 0 0 1 0 0 0 0 1 1 1 0 0 0 1 0 1 1 1 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0
 1 1 0 0 1 0 0 1 1 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0
 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0
 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 1 1 1 0 1 1 1 0 0 1 1 0 0 0 0 1 1
 0 0 1 1 0 0 1 1 1 1 0 1 0 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0 0 0 0 0 1 0 1
 0 0 0 1 1 0 1 0 1 1 0 0 0 1 0 0 0 1 0 1 1 0 1 0 0 1 0 1 1 0 0 1 0 0 0 1 1
 0 0 1 0 0 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 0 0 1 0 0 1 1 1 0 0 1 1 1
 0 1 1 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0
 0 1 0 1 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0
 0 1 0 1 0 0 0 1 0 1 0 1 0 0 0 1 0 1 0 0 0 1 1 0 0 0 0 0 0 1 0 1 1 0 0 0 0
 1 1 0 0 0 1 1 1 0 1 0 1 0 1 0 0 0 1 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 1 1 0 0
 0 0 1 0 0 0 0 0 0 0 1 0 1 1 1 1 0 0 1 1 1 1 1 0 0 1 0 0 0 1 0 0 0 0 0 0 0
 1 0 1 0 1 0 1 0 0 1 0 0 

In [11]:
#SVM kernels
for k in ('linear', 'poly', 'rbf', 'sigmoid'):
    clf = svm.SVC(kernel = k)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_train)
    print(k)
    print(accuracy_score(y_train, y_pred))

linear
0.7638888888888888
poly
0.7934027777777778
rbf
0.8246527777777778
sigmoid
0.6510416666666666


In [12]:
#instantiating the best model 
clf = svm.SVC(kernel = 'rbf')
clf.fit(X_train, y_train)

SVC()

In [14]:
#making a single pediction
patient = np.array([[1., 200., 75., 40., 0., 45., 1.5, 20]])
patient = scaler.transform(patient)
clf.predict(patient)

array([1], dtype=int64)

In [15]:
#testing set prediction
patient = np.array([X_test.iloc[0]])
patient = scaler.transform(patient)
print(clf.predict(patient))
print(y_test.iloc[0])

[1]
1


In [16]:
#accuracy on testing set 
X_test = scaler.transform(X_test)
y_pred = clf.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.7760416666666666


In [18]:
y_zero = np.zeros(y_test.shape)
print(accuracy_score(y_test, y_zero))

0.6770833333333334


In [20]:
#precision and recall
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.90      0.84       130
           1       0.71      0.52      0.60        62

    accuracy                           0.78       192
   macro avg       0.75      0.71      0.72       192
weighted avg       0.77      0.78      0.77       192

