In [None]:
import pandas as pd

# === STEP 1: Category-Alias Mapping ===
def get_feature_alias_by_category():
    return {
        "Demographics": {
            "Age": "Age (Yrs)", # R1. What is your age?
            "BirthGender": "Biological Sex" # On your original birth certificate, were you listed as male or female?
        },
        "Clinical Health": {
            "BMI": "Body Mass Index", # : BMI. Body Mass Index (Weight*703)/(Height in inches**2) (See History Document for more information)
            "Deaf": "Hearing Impaired", # H4. Are you deaf or do you have serious difficulty hearing?
            "EverHadCancer": "Ever Diagnosed with Cancer", #  Q1. Have you ever been diagnosed as having cancer?
            "GeneralHealth_Excellent": "Excellent General Health", # H1. In general, would you say your health is...
            "GeneralHealth_Poor": "Poor General Health",
            "GeneralHealth_Fair": "Fair General Health",
            "GeneralHealth_Good": "Good General Health",
            "GeneralHealth_VeryGood": "Very Good General Health",
            "MedConditions_Depression": "Diagnosed Depression", # H6e. Has a doctor or other health professional ever told you that you had depression or anxiety disorder?
            "MedConditions_Diabetes": "Diagnosed Diabetes", # H6a. Has a doctor or other health professional ever told you that you had diabetes or high blood sugar?
            "MedConditions_HighBP": "Diagnosed High Blood Pressure", # H6b. Has a doctor or other health professional ever told you that you had high blood pressure or hypertension?
            "MedConditions_LungDisease": "Diagnosed Lung Disease", #  H6d. Has a doctor or other health professional ever told you that you had chronic lung disease, asthma, emphysema, or chronic bronchitis?
            "PHQ4": "PHQ-4 (Anxiety/Depression Score)", #  PHQ4: PHQ-4 total score (Derived composite from LittleInterest, Hopeless, Nervous, and Worrying; see History Document for more information)
            "MedConditions_HeartCondition": "Diagnosed Heart Condition" # H6c. Has a doctor or other health professional ever told you that you had heart disease or coronary artery disease?
        },
        "Health Behaviors": {
            "AverageSleepNight": "Average Sleep (hrs/night)", # AverageSleepNight: H9. During the past 7 days, how many hours of sleep did you get on average per night?
            "AverageTimeSitting": "Avg Daily Sitting Time", # : M4. During the past 7 days, how much time did you spend sitting on a typical day at home or at work?
            "FreqGoProvider": "Number of Doctor Visits", # C1. In the past 12 months, not counting times you went to an emergency room, how many times did you go to a doctor, nurse, or other health professional to get care for yourself?
            "WeeklyMinutesModerateExercise": "Moderate Exercise Minutes/Week" # WeeklyMinutesModerateExercise: Minutes per week of at least moderate intensity exercise (Derived from TimesModerateExercise and HowLongModerateExerciseMinutes; see History Document for more information)
        },
        "Substance Use": {
            "AvgDrinksPerWeek": "Average Drinks/Week", # AvgDrinksPerWeek: Average number of drinks per week (Derived from DrinkDaysPerWeek and DrinksPerDay; see History Document for more information)
            "eCigUse_Current": "Currently Uses E-Cigarettes", # eCigUse: Electronic Cigarette Use (Derived from UsedECigEver and UseECigNow; see History Document for more information)
            "eCigUse_Former": "Former E-Cigarette User",
            "eCigUse_Never": "Never Used E-Cigarettes",
            "smokeStat_Current": "Currently Smokes (Cigarettes)", # SmokeStat: Smoking Status (Derived from Smoke100 and SmokeNow; see History Document for more information)
            "smokeStat_Former": "Former Smoker",
            "smokeStat_Never": "Never Smoked"
        }
    }

# === STEP 2: Add alias and category columns ===
def enrich_with_alias_and_category(df, feature_alias_by_category):
    alias_map = {
        raw: alias
        for group in feature_alias_by_category.values()
        for raw, alias in group.items()
    }
    category_map = {
        raw: cat
        for cat, group in feature_alias_by_category.items()
        for raw in group.keys()
    }

    df = df.copy()
    df["alias"] = df["feature"].map(alias_map).fillna(df["feature"])
    df["category"] = df["feature"].map(category_map).fillna("Other")
    return df

# === STEP 3: Reorder columns ===
def reorder_columns(df):
    ordered_cols = [
        "feature", "category", "alias",
        "mean_abs_shap_xgb", "mean_shap_xgb",
        "mean_abs_shap_tabnet", "mean_shap_tabnet",
        "mean_abs_shap_lr", "mean_shap_lr"
    ]
    return df[[col for col in ordered_cols if col in df.columns]]

# === STEP 4: Save to Excel ===
def save_to_excel(df, file_path, sheet_name="importance_matrix_with_category"):
    with pd.ExcelWriter(file_path, engine="openpyxl", mode="a", if_sheet_exists="replace") as writer:
        df.to_excel(writer, sheet_name=sheet_name, index=False)
        return file_path

