# [Medical Diagnosis with Support Vector Machines](https://www.coursera.org/learn/medical-diagnosis-support-vector-machines/)

## Task 1: Import Libraries


In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

## Task 1: Get Data

In [4]:
column_names = ["pregnancies", "glucose", "bpressure", "skinfold", "insulin", "bmi", "pedigree", "age", "class"]
df = pd.read_csv("data.csv", names=column_names)

print(df.shape)
print()

df.head()

(768, 9)



Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


## Task 1: Extract Features

In [5]:
X = df.iloc[:, :-1]
X.head()

Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


## Task 1: Extract Class Labels

In [6]:
y = df["class"]
y.head()

0    1
1    0
2    1
3    0
4    1
Name: class, dtype: int64

## Task 2: Split Dataset

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print()

X_test.head()

(576, 8) (576,) (192, 8) (192,)



Unnamed: 0,pregnancies,glucose,bpressure,skinfold,insulin,bmi,pedigree,age
668,6,98,58,33,190,34.0,0.43,43
324,2,112,75,32,0,35.7,0.148,21
624,2,108,64,0,0,30.8,0.158,21
690,8,107,80,0,0,24.6,0.856,34
473,7,136,90,0,0,29.9,0.21,50


## Task 2: Normalize Features

In [9]:
scaler = StandardScaler()

scaler.fit(X_train)
X_train = scaler.transform(X_train)

X_train[:5, :]

array([[ 2.80346794,  0.25977903, -3.78077929,  0.61677038, -0.69205168,
         1.03974028,  0.29608546,  0.96352088],
       [ 0.07832678,  0.25977903,  0.89724451, -0.03210586,  1.63307692,
         0.40945373, -0.70087555, -0.86295593],
       [-0.22446668, -1.85825286,  0.67966201,  0.48699513, -0.69205168,
         0.31753694, -0.66548048,  1.13747105],
       [-0.52726014, -1.2353023 ,  0.13570575, -0.35654397, -0.03757104,
        -0.24709476,  0.2311945 , -0.68900576],
       [-1.13284707, -0.58120422,  0.29889263,  0.16255702, -0.69205168,
        -4.19951667,  0.30493422, -1.03690611]])

## Task 3: Training a Support Vector Machine

In [10]:
clf = svm.SVC(kernel="sigmoid")
clf.fit(X_train, y_train)

SVC(kernel='sigmoid')

## Task 3: Decision Boundary

In [11]:
y_pred = clf.predict(X_train)

print(y_pred)
print(accuracy_score(y_train, y_pred))

[0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1 1 0 1
 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0
 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 1 0 0 1 1 0 0 0 1
 0 0 1 0 1 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0
 0 0 1 1 1 1 0 1 0 0 0 0 1 0 1 1 1 0 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1 1 1 0 1
 0 0 0 0 1 1 1 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0
 0 1 1 0 1 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 1 1 0 1 0 1 0 1 0 1 0 0 1 0
 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 0 1 1 0
 1 1 0 1 0 1 0 0 1 0 0 0 1 0 0 1 1 0 0 1 1 0 0 0 1 0 0 0 0 0 1 1 1 0 0 1 0
 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 1 0 0 0 0 0 1 1 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0
 1 0 0 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0 0
 0 0 0 1 0 1 0 0 1 0 0 0 

## Task 3: SVM Kernels

In [12]:
for kernel in ("linear", "poly", "rbf", "sigmoid"):
    clf = svm.SVC(kernel=kernel)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_train)
    print(kernel)
    print(accuracy_score(y_train, y_pred))
    print()

linear
0.78125

poly
0.7951388888888888

rbf
0.8315972222222222

sigmoid
0.6875



## Task 4: Instantiating the Best Model

In [13]:
clf = svm.SVC(kernel="rbf")
clf.fit(X_train, y_train)

SVC()

## Task 4: Making a single prediction

In [14]:
# "pregnancies", "glucose", "bpressure",
# "skinfold", "insulin", "bmi",
# "pedigree", "age", "class"
patient = np.array([[1.0, 200.0, 75.0, 40.0, 0.0, 45.0, 1.5, 20.0]])
patient = scaler.transform(patient)
clf.predict(patient)

array([1])

In [15]:
patient = np.array([[1.0, 50.0, 75.0, 40.0, 0.0, 45.0, 1.5, 20.0]])
patient = scaler.transform(patient)
clf.predict(patient)

array([0])

## Task 4: Testing Set Prediction

In [16]:
patient = np.array([X_test.iloc[0]])
patient = scaler.transform(patient)

print(clf.predict(patient), y_test.iloc[0])

[0] 0


## Task 5: Accuracy on Testing Set

In [17]:
X_test = scaler.transform(X_test)
y_pred = clf.predict(X_test)

print(accuracy_score(y_test, y_pred))

0.7291666666666666


## Task 5: Comparison to All-Zero Prediction

In [18]:
y_zero = np.zeros(y_test.shape)

print(accuracy_score(y_test, y_zero))

0.640625


## Task 5: Precision and Recall

In [19]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.77      0.82      0.80       123
           1       0.64      0.57      0.60        69

    accuracy                           0.73       192
   macro avg       0.71      0.69      0.70       192
weighted avg       0.72      0.73      0.73       192

