In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import learning_curve
from sklearn.model_selection import ShuffleSplit
from sklearn.cluster import KMeans
from sklearn.compose import make_column_transformer
from sklearn import svm
from sklearn import metrics

## Preprocessing

In [3]:
df = pd.read_csv('UCI_Heart_Disease.csv',index_col=False)
# get label from dataframe
label = df['target'].to_numpy()
df = df.drop(columns ='target')

# locate the catagotical columns (ie.feature described by words, not numbers)
catagorical_col = []
columns = df.columns[:]
for col in columns:
    if type(df[col][0])==type('str'):
        catagorical_col.append(col)
        
# transform str feature to numerical feature
column_transformer = make_column_transformer(
    (OneHotEncoder(), catagorical_col), remainder = 'passthrough')

data = column_transformer.fit_transform(df)

# transform the data into n clusters
KM_data = KMeans(n_clusters=300, random_state=0).fit_transform(data)

X = KM_data
y = label

x_train, x_val, y_train, y_val = train_test_split(KM_data,label,stratify=label,test_size = 0.2,random_state=555,shuffle=True)
print(x_train.shape)
print(x_val.shape)

(820, 300)
(205, 300)


## SVM Classification

In [4]:
clf = svm.SVC(kernel='linear')
clf.fit(x_train, y_train)
predictions = clf.predict(x_val)
acc = metrics.accuracy_score(y_val,predictions)
prec = metrics.precision_score(y_val,predictions)
recall = metrics.recall_score(y_val,predictions)
print('Accuracy:', acc)
print('Precision:', prec)
print('Recall:', recall)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0


In [5]:
clf = svm.SVC(kernel='poly', coef0=15)
clf.fit(x_train, y_train)
predictions = clf.predict(x_val)
acc = metrics.accuracy_score(y_val,predictions)
prec = metrics.precision_score(y_val,predictions)
recall = metrics.recall_score(y_val,predictions)
print('Accuracy:', acc)
print('Precision:', prec)
print('Recall:', recall)

Accuracy: 0.8926829268292683
Precision: 0.8878504672897196
Recall: 0.9047619047619048


In [6]:
clf = svm.SVC(kernel='sigmoid', gamma = 1/13)
clf.fit(x_train, y_train)
predictions = clf.predict(x_val)
acc = metrics.accuracy_score(y_val,predictions)
prec = metrics.precision_score(y_val,predictions)
recall = metrics.recall_score(y_val,predictions)
print('Accuracy:', acc)
print('Precision:', prec)
print('Recall:', recall)

Accuracy: 0.5121951219512195
Precision: 0.5121951219512195
Recall: 1.0


In [7]:
clf = svm.SVC(kernel='rbf', gamma = 1/13)
clf.fit(x_train, y_train)
predictions = clf.predict(x_val)
acc = metrics.accuracy_score(y_val,predictions)
prec = metrics.precision_score(y_val,predictions)
recall = metrics.recall_score(y_val,predictions)
print('Accuracy:', acc)
print('Precision:', prec)
print('Recall:', recall)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
