In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import RFE, chi2, SelectKBest
from xgboost import XGBClassifier

df["num"] = df["num"].apply(lambda x: 1 if x > 0 else 0)
X = df.drop("num", axis=1)  # adjust if your target column has a different name
y = df["num"]
X = pd.get_dummies(X, drop_first=True)

rf = RandomForestClassifier(random_state=42)
rf.fit(X,y)
rf_importances = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)

print("Random Forest Feature Importance:\n", rf_importances)


In [None]:
xgb = XGBClassifier(use_label_encoder=False, eval_metric="logloss", random_state=42)
xgb.fit(X, y)
xgb_importances = pd.Series(xgb.feature_importances_, index=X.columns).sort_values(ascending=False)

print("\nXGBoost Feature Importance:\n", xgb_importances)


In [None]:
rf_importances.plot(kind="bar", title="Random Forest Feature Importance")
plt.show()
xgb_importances.plot(kind="bar", title="XGBoost Feature Importance")
plt.show()

In [None]:
rfe_selector = RFE(estimator=RandomForestClassifier(random_state=42), n_features_to_select=8)
rfe_selector = rfe_selector.fit(X, y)
rfe_features = X.columns[rfe_selector.support_]

print("\nSelected Features by RFE:", list(rfe_features))


In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)  # chi2 requires non-negative values
chi2_selector = SelectKBest(chi2, k=8)
chi2_selector.fit(X_scaled, y)
chi2_features = X.columns[chi2_selector.get_support()]

print("\nSelected Features by Chi-Square Test:", list(chi2_features))

In [None]:
final_features = set(rf_importances.head(8).index) | set(xgb_importances.head(8).index) | set(rfe_features) | set(chi2_features)
print("\nFinal Selected Features for Modeling:", list(final_features))