In [1]:
!pip install pandas matplotlib seaborn scikit-learn

Collecting pandas
  Downloading pandas-2.3.3-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting matplotlib
  Downloading matplotlib-3.10.6-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting scikit-learn
  Downloading scikit_learn-1.7.2-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.3.3-cp312-cp312-win_amd64.whl.metadata (60 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.60.1-cp312-cp312-

In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import mutual_info_classif, RFE
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb

# === STEP 1: Load your data ===
# Change file name and target column
df = pd.read_csv("your_data.csv")

target_col = "label"   # <- change to your label column name
X = df.drop(columns=[target_col])
y = df[target_col]

# === STEP 2: Mutual Information Ranking ===
mi = mutual_info_classif(X, y, random_state=42)
mi_rank = pd.Series(mi, index=X.columns).rank(ascending=False)

# === STEP 3: Recursive Feature Elimination (RFE) ===
estimator = RandomForestClassifier(random_state=42, n_estimators=100)
rfe = RFE(estimator, n_features_to_select=1, step=1)
rfe.fit(X, y)
rfe_rank = pd.Series(rfe.ranking_, index=X.columns).rank(ascending=True)

# === STEP 4: LightGBM Feature Importance ===
lgb_model = lgb.LGBMClassifier(random_state=42)
lgb_model.fit(X, y)
lgb_rank = pd.Series(lgb_model.feature_importances_, index=X.columns).rank(ascending=False)

# === STEP 5: Combine all rankings ===
rank_df = pd.DataFrame({
    "MI_Rank": mi_rank,
    "RFE_Rank": rfe_rank,
    "LGBM_Rank": lgb_rank
})
rank_df["Mean_Rank"] = rank_df.mean(axis=1)
rank_df = rank_df.sort_values("Mean_Rank")

# === STEP 6: Show Top Features ===
top_n = 10   # change this to how many you want
top_features = rank_df.head(top_n)

print("\nTop Ranked Features:")
print(top_features)

# === STEP 7: Save ranked features to CSV ===
rank_df.to_csv("feature_ranking.csv", index=True)
print("\nFull feature ranking saved to feature_ranking.csv")

# === STEP 8 (Optional): Reduce dataset ===
X_selected = X[top_features.index]
X_selected.to_csv("reduced_features.csv", index=False)
print(f"\nReduced dataset with top {top_n} features saved to reduced_features.csv")


KeyboardInterrupt: 