In [2]:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import train_test_split

In [3]:
# Load the dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)
X = data.iloc[:, 2:].values
y = data.iloc[:, 1].values

In [4]:

# Encode the labels
y = (y == 'M').astype(int)

In [5]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [6]:
# Initialize the list to store the accuracy of the model at each iteration
accuracies = []

In [7]:
# Start with all features
current_features = X_train.shape[1]

In [8]:
# Define the number of features to remove at each iteration
n_remove = 1

while current_features > 0:
    # Select the K best features using f_classif
    selector = SelectKBest(f_classif, k=current_features)
    selector.fit(X_train, y_train)
    X_train_selected = selector.transform(X_train)
    X_test_selected = selector.transform(X_test)

    # Train a Random Forest classifier on the selected features
    clf = RandomForestClassifier(n_estimators=100, random_state=0)
    clf.fit(X_train_selected, y_train)

    # Evaluate the accuracy of the classifier on the test set
    accuracy = clf.score(X_test_selected, y_test)
    accuracies.append(accuracy)

    # Decrement the number of features
    current_features -= n_remove

In [None]:
# Plot the evolution of accuracy over the iterations
plt.plot(range(len(accuracies)), accuracies)
plt.xlabel("Number of Iterations")
plt.ylabel("Accuracy")
plt.title("Accuracy over the Iterations of Forward-Backward Feature Selection")
plt.show()