<a href="https://colab.research.google.com/github/mehdiabbasidev/darsman-machine-learning/blob/main/WrapperMethods_FeatureSelection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Dataset download link :
https://drive.google.com/file/d/1cbqKrMUQxbJ3McHGely29H6rPOBu41Xa/view?usp=sharing

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

### Step Forward and Backward Feature Selection

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
%matplotlib inline

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('/content/drive/MyDrive/datasets/paribas.csv', nrows=2000)
data.head()

In [None]:
data.shape

In [None]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
numerical_vars = list(data.select_dtypes(include=numerics).columns)
data = data[numerical_vars]
data.shape

In [None]:
X=data.drop(labels=['target', 'ID'], axis=1)
y=data['target']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=0)
X_train.shape, X_test.shape

In [None]:
def correlation(dataset, threshold):
  corr_matrix = dataset.corr().abs()
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
  corr_columns = [column for column in upper.columns if any(upper[column] > threshold)]
  return corr_columns

corr_features = correlation(X_train, 0.7)
print(f"corr_features : {corr_features}")
print(f"corr_features len : {len(corr_features)} ")

In [None]:
X_train=X_train.drop(labels=corr_features, axis=1)
X_test=X_test.drop(labels=corr_features, axis=1)
X_train.shape, X_test.shape

In [None]:
rfc=RandomForestClassifier()
sfs1 = SFS(rfc,k_features=2,forward=True, verbose=2, scoring='roc_auc')
sfs1 = sfs1.fit(np.array(X_train.fillna(0)), y_train)

In [None]:
selected_features= X_train.columns[list(sfs1.k_feature_idx_)]
selected_features

### Recursive Feature Elimination (RFE)

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import RFE

In [None]:
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

In [None]:
X = df.drop(['target'],axis=1)
y = df['target']

In [None]:
lr = LogisticRegression(max_iter=1000)
selector = RFE(lr, n_features_to_select=3, step=1)
selector.fit(X, y)

In [None]:
print(selector.support_)
print(selector.ranking_)
selected_features = X.columns[selector.support_]
print("Selected features:", selected_features)