In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Splitting dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the classifier (Logistic Regression in this case)
clf = LogisticRegression(max_iter=200)

# Initializing the Sequential Forward Selection (SFS)
sfs = SFS(clf,
           k_features=3,  # Select top 3 features
           forward=True,  # Forward selection
           floating=False,  # Set to False for simple step forward
           scoring='accuracy',
           cv=5)  # 5-fold cross-validation

# Perform SFS
sfs = sfs.fit(X_train, y_train)

# Get selected feature indices from SFS
selected_features_sfs = sfs.k_feature_idx_
print(f"Selected Features (SFS): {selected_features_sfs}")

# Subset the dataset with selected features from SFS
X_train_sfs = sfs.transform(X_train)
X_test_sfs = sfs.transform(X_test)

# Train classifier with selected features from SFS
clf.fit(X_train_sfs, y_train)

# Predict and evaluate model performance with SFS
y_pred_sfs = clf.predict(X_test_sfs)
accuracy_sfs = accuracy_score(y_test, y_pred_sfs)
print(f"Model accuracy with selected features (SFS): {accuracy_sfs}")

# Initializing the Sequential Backward Selection (SBS)
sbs = SFS(clf,
           k_features=3,  # Select top 3 features
           forward=False,  # Backward selection
           floating=False,  # Set to False for simple step backward
           scoring='accuracy',
           cv=5)  # 5-fold cross-validation

# Perform SBS
sbs = sbs.fit(X_train, y_train)

# Get selected feature indices from SBS
selected_features_sbs = sbs.k_feature_idx_
print(f"Selected Features (SBS): {selected_features_sbs}")

# Subset the dataset with selected features from SBS
X_train_sbs = sbs.transform(X_train)
X_test_sbs = sbs.transform(X_test)

# Train classifier with selected features from SBS
clf.fit(X_train_sbs, y_train)

# Predict and evaluate model performance with SBS
y_pred_sbs = clf.predict(X_test_sbs)
accuracy_sbs = accuracy_score(y_test, y_pred_sbs)
print(f"Model accuracy with selected features (SBS): {accuracy_sbs}")


Selected Features (SFS): (0, 2, 3)
Model accuracy with selected features (SFS): 1.0
Selected Features (SBS): (0, 2, 3)
Model accuracy with selected features (SBS): 1.0


In [None]:
# prompt: print dataset

print(X)


     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8

[150 rows x 4 columns]
