Load the data

In [16]:
import pandas as pd

data = pd.read_csv('../data/heart_2020_cleaned.csv')

Separate between objective and independent variables

In [17]:
char = data.drop(columns=['HeartDisease'])
obj = data['HeartDisease']

Dividing between test and train data

In [18]:
from sklearn.model_selection import train_test_split

char_train, char_test, obj_train, obj_test = train_test_split(char, obj, test_size=0.2, random_state=42)

Normalize variables

In [19]:
from sklearn.preprocessing import MinMaxScaler

columns_to_normalize = data.select_dtypes(include=['float64']).columns
scaler = MinMaxScaler()
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])

In [20]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

selected_features = []

k = 50
knn_model = KNeighborsClassifier(n_neighbors=k)

# Evaluate the features chosen
def evaluate_model(features):
    knn_model.fit(char_train.iloc[:, features], obj_train)
    predictions = knn_model.predict(char_test.iloc[:, features])
    accuracy = accuracy_score(obj_test, predictions)
    return accuracy

# Bucle Forward Selection
while len(selected_features) < char_train.shape[1]:
    best_accuracy = 0
    best_feature = None
    
    # Iterate over the non selected features
    for feature_index in range(char_train.shape[1]):
        if feature_index not in selected_features:
            current_features = selected_features + [feature_index]
            accuracy = evaluate_model(current_features)
            
            # If the new precision is better than the last precision, updates it
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = feature_index
    
    # Adds the best feature to the other best ones
    selected_features.append(best_feature)
    
    # Progress
    print(f"Selected Features: {selected_features}, Accuracy: {best_accuracy:.2f}")

print("Final Selected Features:", selected_features)


Selected Features: [8], Accuracy: 0.69
Selected Features: [8, 11], Accuracy: 0.74
Selected Features: [8, 11, 7], Accuracy: 0.75
Selected Features: [8, 11, 7, 20], Accuracy: 0.76
Selected Features: [8, 11, 7, 20, 9], Accuracy: 0.76
Selected Features: [8, 11, 7, 20, 9, 1], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13, 2], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13, 2, 14], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13, 2, 14, 17], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13, 2, 14, 17, 19], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13, 2, 14, 17, 19, 18], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16, 6, 13, 2, 14, 17, 19, 18, 10], Accuracy: 0.77
Selected Features: [8, 11, 7, 20, 9, 1, 16,