Load the data

In [22]:
import pandas as pd

data = pd.read_csv('../data/heart_2020_cleaned.csv')

Normalize variables

In [23]:
from sklearn.preprocessing import MinMaxScaler

columns_to_normalize = data.select_dtypes(include=['float64']).columns
scaler = MinMaxScaler()
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])


Separate between objective and independent variables

In [24]:
char = data.drop(columns=['HeartDisease'])
obj = data['HeartDisease']


Applicating Mutual Information

In [25]:
from sklearn.feature_selection import SelectKBest, f_classif
 
selector = SelectKBest(score_func=f_classif, k=14)
char_new = selector.fit_transform(char, obj)
selected_features_indices = selector.get_support(indices=True)
selected_features = char.columns[selected_features_indices]
print("Selected Features:", selected_features)

Selected Features: Index(['BMI', 'Smoking', 'Stroke', 'PhysicalHealth', 'DiffWalking', 'Sex',
       'AgeCategory', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma',
       'KidneyDisease', 'SkinCancer', 'Race_White'],
      dtype='object')


Divide the data between train and test data

In [26]:
from sklearn.model_selection import train_test_split

char_train, char_test, obj_train, obj_test = train_test_split(char_new, obj, test_size=0.2, random_state=42)


In [27]:
from sklearn.svm import SVC

svm_model = SVC(kernel='rbf', C=1)

In [28]:
from sklearn.metrics import accuracy_score, recall_score, f1_score

svm_model.fit(char_train, obj_train)
predictions = svm_model.predict(char_test)

accuracy = accuracy_score(obj_test, predictions)
recall = recall_score(obj_test, predictions)
f1 = f1_score(obj_test, predictions)

print(f'Accuracy: {accuracy:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1-Score: {f1:.2f}')


Accuracy: 0.76
Recall: 0.84
F1-Score: 0.79
