In [10]:
import pandas as pd
import numpy as np
import glob

In [14]:
import pandas as pd
import glob

def save_selected_features(dfs):
    """
    For each dataframe in dfs:
      - extracts features with non-zero importance
      - saves them to a corresponding .txt file

    Parameters
    ----------
    dfs : dict
        Dictionary of dataframes keyed by filename.
    """
    for fname, df in dfs.items():
        # Features with Importance == 0
        zero_feats = df.loc[df["Importance"] == 0, "Feature"].unique().tolist()

        # Features that are not in zero_feats
        selected_feats = df.loc[~df["Feature"].isin(zero_feats), "Feature"].unique().tolist()

        # Build output .txt filename (same as CSV but with .txt)
        out_txt = fname.replace("Feature_importances", "Specificfeatures").replace(".csv", ".txt")

        # Save to file
        with open(out_txt, "w") as f:
            for feat in selected_feats:
                f.write(f"{feat}\n")

        print(f"✅ Saved {len(selected_feats)} features to {out_txt}")

# --- Usage ---


In [15]:
files = glob.glob("Feature_importances_LightGBM*_*_healthycontrol.csv")
dfs = {f: pd.read_csv(f) for f in files}
save_selected_features(dfs)


✅ Saved 75 features to Specificfeatures_LightGBM_balanced_Training_residualstraining_healthycontrol.txt
✅ Saved 72 features to Specificfeatures_LightGBM_balanced_Training_residualshealthycontrol_healthycontrol.txt


In [16]:

files = glob.glob("Feature_importances_LightGBM*_*_neurodegenerative.csv")
dfs = {f: pd.read_csv(f) for f in files}
save_selected_features(dfs)


✅ Saved 2 features to Specificfeatures_LightGBM_balanced_Training_residualstraining_neurodegenerative.txt
✅ Saved 61 features to Specificfeatures_LightGBM_balanced_Training_residualshealthycontrol_neurodegenerative.txt


In [17]:
files = glob.glob("Feature_importances_LASSORegression*_*_healthycontrol.csv")
dfs = {f: pd.read_csv(f) for f in files}
save_selected_features(dfs)

✅ Saved 304 features to Specificfeatures_LASSORegression_Training_residualstraining_healthycontrol.txt
✅ Saved 297 features to Specificfeatures_LASSORegression_Training_residualshealthycontrol_healthycontrol.txt


In [18]:
files = glob.glob("Feature_importances_LASSORegression*_*_neurodegenerative.csv")
dfs = {f: pd.read_csv(f) for f in files}
save_selected_features(dfs)

✅ Saved 253 features to Specificfeatures_LASSORegression_Training_residualstraining_neurodegenerative.txt
✅ Saved 253 features to Specificfeatures_LASSORegression_Training_residualshealthycontrol_neurodegenerative.txt
