# Experiment 6

#### Problem Statement: 
Applying wrapper methods for feature selection on the dataset.

#### Code: 

In [42]:
# import the requied libraries
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, f_regression, RFE, SequentialFeatureSelector
from sklearn.linear_model import LogisticRegression, Lasso
from sklearn.preprocessing import LabelEncoder

In [43]:
# import the datatset
iris_data = pd.read_csv('iris.csv')

# print the dataset
print(iris_data.head().to_markdown())

|    |   Id |   SepalLengthCm |   SepalWidthCm |   PetalLengthCm |   PetalWidthCm | Species     |
|---:|-----:|----------------:|---------------:|----------------:|---------------:|:------------|
|  0 |    1 |             5.1 |            3.5 |             1.4 |            0.2 | Iris-setosa |
|  1 |    2 |             4.9 |            3   |             1.4 |            0.2 | Iris-setosa |
|  2 |    3 |             4.7 |            3.2 |             1.3 |            0.2 | Iris-setosa |
|  3 |    4 |             4.6 |            3.1 |             1.5 |            0.2 | Iris-setosa |
|  4 |    5 |             5   |            3.6 |             1.4 |            0.2 | Iris-setosa |


In [44]:
# ecnode the categorical data into numerical data
iris_data['Species'] = iris_data['Species'].replace(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], [0, 1, 2])

In [45]:
# print the datatype of the dataset
print(iris_data.dtypes)

Id                 int64
SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
Species            int64
dtype: object


In [46]:
X = iris_data.drop(columns=['Id', 'Species'])
y = iris_data['Species']

In [47]:
# Filter Method
fs = SelectKBest(score_func=f_regression, k=2)
X_selected = fs.fit_transform(X, y)
feature_scores = pd.DataFrame(fs.scores_, index=X.columns, columns=['Score'])
print("Filter Method:")
print(feature_scores)
print("\n")

Filter Method:
                     Score
SepalLengthCm   233.838996
SepalWidthCm     31.597508
PetalLengthCm  1342.159189
PetalWidthCm   1589.559204




In [48]:
# Wrapper Method: RFE with Logistic Regression
model = LogisticRegression(max_iter=1000)
rfe = RFE(estimator=model, n_features_to_select=3)
fit = rfe.fit(X, y)
feature_ranking = pd.DataFrame(
    {'Feature': X.columns, 'Ranking': fit.ranking_}).sort_values(by='Ranking')
print("Wrapper Method: RFE")
print(feature_ranking)
print("\n")

Wrapper Method: RFE
         Feature  Ranking
1   SepalWidthCm        1
2  PetalLengthCm        1
3   PetalWidthCm        1
0  SepalLengthCm        2




In [49]:
# Wrapper Method: Backward Elimination
model = LogisticRegression(max_iter=1000)
sfs_backward = SequentialFeatureSelector(estimator=model, n_features_to_select=3, direction='backward')
sfs_backward.fit(X, y)
feature_ranking = pd.DataFrame(
    {'Feature': X.columns, 'Ranking': sfs_backward.get_support()}).sort_values(by='Ranking')
print("Wrapper Method: Backward Elimination")
print(feature_ranking)

Wrapper Method: Backward Elimination
         Feature  Ranking
0  SepalLengthCm    False
1   SepalWidthCm     True
2  PetalLengthCm     True
3   PetalWidthCm     True


In [51]:
# Wrapper Method: Forward Selection
model = LogisticRegression(max_iter=1000)
sfs_forward = SequentialFeatureSelector(estimator=model, n_features_to_select=3, direction='forward')
sfs_forward.fit(X, y)
feature_ranking = pd.DataFrame(
    {'Feature': X.columns, 'Ranking': sfs_forward.get_support()}).sort_values(by='Ranking')
print("Wrapper Method: Forward Selection")
print(feature_ranking)

Wrapper Method: Forward Selection
         Feature  Ranking
0  SepalLengthCm    False
1   SepalWidthCm     True
2  PetalLengthCm     True
3   PetalWidthCm     True
