In [1]:
from sklearn.feature_selection import RFECV
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier

In [2]:
import pandas as pd

In [3]:
from sklearn.linear_model import Lasso

In [4]:
from sklearn.ensemble import RandomForestRegressor

In [5]:
import numpy as np
import statsmodels.api as sm

In [6]:
import statsmodels.api as sm

In [7]:
df= pd.read_csv('BMI.csv')

In [8]:
x = df.drop('fatpctg', axis=1)
y = df['fatpctg']

In [9]:
def forward_stepwise_selection(X, y, initial_list=[], threshold_in=0.01):
    included = list(initial_list)
    while True:
        changed = False

        # Forward step
        excluded = list(set(X.columns) - set(included))
        new_pval = pd.Series(index=excluded)

        for new_column in excluded:
            model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included + [new_column]]))).fit()
            new_pval[new_column] = model.pvalues[new_column]

        best_pval = new_pval.min()
        if best_pval < threshold_in:
            best_feature = new_pval.idxmin()
            included.append(best_feature)
            changed = True

        if not changed:
            break

    return included


In [10]:
def backward_stepwise_selection(X, y, initial_list=[], threshold_out=0.01):
    included = x.columns.tolist()
    while True:
        changed = False

       

        # Backward step
        model = sm.OLS(y, sm.add_constant(pd.DataFrame(X[included]))).fit()
        pvalues = model.pvalues.drop('const')
        worst_pval = pvalues.max()
        if worst_pval > threshold_out:
            changed = True
            worst_feature = pvalues.idxmax()
            included.remove(worst_feature)
        
        if not changed:
            break

    return included


In [11]:
result = forward_stepwise_selection(x, y)
print("Selected features:", result)

Selected features: ['Abdomen', 'Weight', 'Wrist']


In [12]:
result = backward_stepwise_selection(x, y)
print("Selected features:", result)

Selected features: ['Age', 'Neck', 'Abdomen', 'Forearm', 'Wrist']


In [13]:
correlations = x.corrwith(y)

In [14]:
correlation_df = pd.DataFrame({'Feature': x.columns, 'Correlation': correlations.abs()})

# Sort the DataFrame by absolute correlation values in descending order
correlation_df = correlation_df.sort_values(by='Correlation', ascending=False)

# Output the ranking
print(correlation_df)

         Feature  Correlation
Abdomen  Abdomen     0.813432
Chest      Chest     0.702620
Hip          Hip     0.625201
Weight    Weight     0.612925
Thigh      Thigh     0.559608
Knee        Knee     0.508665
Biceps    Biceps     0.493271
Neck        Neck     0.490592
Forearm  Forearm     0.361387
Wrist      Wrist     0.346575
Age          Age     0.291458
Ankle      Ankle     0.265970
Height    Height     0.133211


In [15]:
lasso = Lasso(alpha=0.5)  # You can adjust the regularization strength (alpha) as needed
lasso.fit(x, y)

In [16]:
feature_coefficients = pd.Series(lasso.coef_, index=x.columns)

# Sort the features based on the absolute coefficients in descending order
feature_ranking_lasso = feature_coefficients.abs().sort_values(ascending=False)

# Output the ranking
print(feature_ranking_lasso)

Abdomen    0.931107
Neck       0.309097
Thigh      0.131093
Weight     0.100067
Hip        0.090851
Forearm    0.072192
Height     0.052464
Age        0.017125
Wrist      0.009993
Chest      0.000000
Knee       0.000000
Ankle      0.000000
Biceps     0.000000
dtype: float64


In [17]:
rf = RandomForestRegressor(n_estimators=100)  # You can adjust parameters as needed
rf.fit(x, y)

# Get feature importances
feature_importances = pd.Series(rf.feature_importances_, index=x.columns)

# Sort the features based on importance in descending order
feature_ranking_rf = feature_importances.sort_values(ascending=False)

# Output the ranking
print(feature_ranking_rf)

Abdomen    0.722864
Height     0.033021
Weight     0.032006
Knee       0.026639
Chest      0.024176
Neck       0.023463
Wrist      0.022668
Age        0.022626
Ankle      0.020640
Hip        0.020024
Biceps     0.018914
Thigh      0.018499
Forearm    0.014460
dtype: float64
