## Forward and Backward Feature selection algorithms

## importing needed libraries

In [27]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression

## importing data 

In [28]:
diabetes = load_diabetes() 

In [29]:
X = diabetes.data 

In [30]:
y = diabetes.target 

## normalizing data

In [32]:
from sklearn.preprocessing import MinMaxScaler
mmscaler = MinMaxScaler()

In [39]:
X = diabetes.data
y = diabetes.target

In [43]:
X = mmscaler.fit_transform(X)

## implementing forward function

In [44]:
def forward_feature_selection(X, y):
    n_features = X.shape[1]
    remaining_features = set(range(n_features))
    selected_features = []
    mse_history = []
    best_mse = np.inf
    
    while remaining_features:
        candidate_mse = []
        for feature in remaining_features:
            features = selected_features + [feature]
            X_subset = X[:, features]
            model = LinearRegression().fit(X_subset, y)
            y_pred = model.predict(X_subset)
            mse = np.mean((y - y_pred)**2)
            candidate_mse.append(mse)
        
        best_feature = np.argmin(candidate_mse)
        best_mse = candidate_mse[best_feature]
        selected_features.append(list(remaining_features)[best_feature])
        remaining_features.remove(list(remaining_features)[best_feature])
        mse_history.append(best_mse)
    
    return selected_features, mse_history[-1]

## implementing backward function

In [59]:
def backward_feature_selection(X, y):
    num_features = X.shape[1]
    selected_features = [i for i in range(num_features)]
    best_mse = np.inf
    
    for i in range(num_features):
        remaining_features = [f for f in range(num_features) if f != i]
        mse_list = []
        
        for f in remaining_features:
            features = selected_features[:]
            features.remove(f)
            X_subset = X[:, features]
            
            model = LinearRegression()
            model.fit(X_subset, y)
            y_pred = model.predict(X_subset)
            
            mse = mean_squared_error(y, y_pred)
            mse_list.append(mse)
        
        if len(mse_list) > 0:
            best_feature_idx = np.argmin(mse_list)
            if mse_list[best_feature_idx] < best_mse:
                best_mse = mse_list[best_feature_idx]
                selected_features.remove(i)
    
    return selected_features, best_mse

## testing

In [60]:
selected_features, best_mse = forward_feature_selection(X, y)
print("Forward features = ", selected_features)
print("MSE = ", best_mse)

Forward features =  [2, 8, 3, 4, 1, 5, 7, 9, 6, 0]
MSE =  2859.6903987680657


In [61]:
selected_features, best_mse = forward_feature_selection(X, y)
print("Forward features = ", selected_features)
print("MSE = ", best_mse)

Forward features =  [2, 8, 3, 4, 1, 5, 7, 9, 6, 0]
MSE =  2859.6903987680657
