In [2]:
# inspect_pipeline.py
from preprocessors import (
    LowerCaseStrings,
    StringConverter,
    YearExtractor,
    IQRCapper,
    ConstructionYearTransformer,
    ObjectToNumericConverter,
    AgeCalculator,
    FrequencyEncoder,
    RegionCodeCombiner,
    ColumnDropper,
    AgePipeline,
    GeoContextImputer
)
import joblib
import numpy as np

# 1) Load the full pipeline you saved.
pipeline = joblib.load("full_pipeline.joblib")

# 2) Grab the ColumnTransformer (named "preprocessor") and the selector step (named "feature_selection").
preprocessor = pipeline.named_steps["preprocessor"]
selector     = pipeline.named_steps["feature_selection"]

# 3) Ask the ColumnTransformer for its output feature names
#    This returns a numpy array like ["age", "num__gps_height", "cat__basin_lake victoria", ...]
all_proc_feature_names = preprocessor.get_feature_names_out()

# 4) Ask the selector which of these it kept
mask = selector.get_support()           # boolean mask, same length as all_proc_feature_names
important_features = np.array(all_proc_feature_names)[mask]

print("\n🚨 Total features after preprocessing (before selection):", len(all_proc_feature_names))
print("   Example:", all_proc_feature_names[:10], "…\n")

print("🚨 Number of features kept by SelectFromModel:", important_features.shape[0])
print("🚨 These are the features that the RandomForest sees:\n")
for feat in important_features:
    print("   •", feat)



🚨 Total features after preprocessing (before selection): 116
   Example: ['age__age' 'num__amount_tsh' 'cat__source_class_surface'
 'cat__source_class_unknown' 'cat__water_quality_fluoride'
 'cat__water_quality_fluoride abandoned' 'cat__water_quality_milky'
 'cat__water_quality_salty' 'cat__water_quality_salty abandoned'
 'cat__water_quality_soft'] …

🚨 Number of features kept by SelectFromModel: 88
🚨 These are the features that the RandomForest sees:

   • age__age
   • num__amount_tsh
   • cat__source_class_surface
   • cat__water_quality_milky
   • cat__water_quality_salty
   • cat__water_quality_salty abandoned
   • cat__water_quality_soft
   • cat__water_quality_unknown
   • cat__payment_type_monthly
   • cat__payment_type_never pay
   • cat__payment_type_on failure
   • cat__payment_type_other
   • cat__payment_type_per bucket
   • cat__payment_type_unknown
   • cat__extraction_type_class_handpump
   • cat__extraction_type_class_motorpump
   • cat__extraction_type_class_other
  