In [5]:
from sklearn.ensemble import ExtraTreesClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import StandardScaler
import pandas as pd
from mlcomp.data.load import load_classification_train
from mlcomp.data.preprocess import drop_ft2, remove_outliers

In [6]:
algos = [
    ExtraTreesClassifier,
    HistGradientBoostingClassifier,
    XGBClassifier,
]

df = load_classification_train()
df = drop_ft2(df)
df = remove_outliers(df, random_state=42)

X = df.drop(columns='label')
X = StandardScaler().fit_transform(X)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
results = pd.DataFrame(columns=["model_name", "perm_imp_result", "model"])

for index, alg in enumerate(algos):
    model = alg()
    model_name = model.__class__.__name__

    print(f"Fitting {model_name}...")

    model.fit(X_train, y_train)

    p_res = permutation_importance(model, X_test, y_test, n_repeats=5, random_state=42, scoring='f1_macro')

    results.loc[index] = [model_name, p_res, model]

Fitting ExtraTreesClassifier...
Fitting HistGradientBoostingClassifier...
Fitting XGBClassifier...


  if is_sparse(data):


In [8]:
for _, row in results.iterrows():
    res = row['perm_imp_result']
    print(f"Features to drop for {row['model_name']}")
    features = []
    for ft_nr, mean in enumerate(res.importances_mean):
        if mean < 0:
            features.append(f'feature_{ft_nr}')
    print(features)

Features to drop for ExtraTreesClassifier
['feature_0', 'feature_7', 'feature_20', 'feature_22', 'feature_28']
Features to drop for HistGradientBoostingClassifier
['feature_7', 'feature_10', 'feature_29']
Features to drop for XGBClassifier
['feature_2', 'feature_4', 'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_12', 'feature_14', 'feature_16', 'feature_20', 'feature_22', 'feature_24', 'feature_26', 'feature_27', 'feature_29']
