Load the data

In [None]:
import pandas as pd

data = pd.read_csv('../data/heart_2020_cleaned.csv')

Separate between objective and independent variables

In [None]:
char = data.drop(columns=['HeartDisease'])
obj = data['HeartDisease']

Divide the data between train and test data

In [None]:
from sklearn.model_selection import train_test_split

char_train, char_test, obj_train, obj_test = train_test_split(char, obj, test_size=0.2, random_state=42)
print(char_train)

Normalize variables

In [None]:
from sklearn.preprocessing import MinMaxScaler

columns_to_normalize = data.select_dtypes(include=['float64']).columns
scaler = MinMaxScaler()
data[columns_to_normalize] = scaler.fit_transform(data[columns_to_normalize])

RIPPER algorithm with sequential forward feature selection

In [None]:
from wittgenstein import RIPPER
from sklearn.metrics import accuracy_score, recall_score, f1_score

selected_features = [] 

ripper = RIPPER()

# Evaluate the features chosen
def evaluate_model(features):
    ripper.fit(char_train.iloc[:, features], obj_train)
    predictions = ripper.predict(char_test.iloc[:, features])
    accuracy = accuracy_score(obj_test, predictions)
    return accuracy

# Bucle Forward Selection
while len(selected_features) < char_train.shape[1]:
    best_accuracy = 0
    best_feature = None
    
    # Iterate over the non selected features
    for feature_index in range(char_train.shape[1]):
        if feature_index not in selected_features:
            current_features = selected_features + [feature_index]
            accuracy = evaluate_model(current_features)
            
            # If the new precision is better than the last precision, updates it
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_feature = feature_index
    
    # Adds the best feature to the other best ones
    selected_features.append(best_feature)
    
    # Progress
    print(f"Selected Features: {selected_features}, Accuracy: {best_accuracy:.2f}")

print("Final Selected Features:", selected_features)