This script initializes two FeatureBagging models, one with IForest and the other with HBOS as the base detectors. They're fitted on the scaled data, and the outlier scores are obtained from each model. The final outlier scores are the average of these two sets of scores. The indices of the top N outliers are then used to select the corresponding features from the original dataframe.

Remember, this approach focuses on finding the features most related to the outliers detected by the feature bagging approach, not on selecting the most representative features for the dataset.

In [None]:
from pyod.models.feature_bagging import FeatureBagging
from pyod.models.iforest import IForest
from pyod.models.hbos import HBOS
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Scaling the data
scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# Define the number of top features to select
N = 10

# Initialize the base detectors
iforest = IForest()
hbos = HBOS()

# Initialize the feature bagging detectors
fb_iforest = FeatureBagging(iforest, contamination=0.01, check_estimator=False, random_state=10)
fb_hbos = FeatureBagging(hbos, contamination=0.01, check_estimator=False, random_state=10)

# Fit the models
fb_iforest.fit(df_scaled)
fb_hbos.fit(df_scaled)

# Get the outlier scores
outlier_scores_iforest = fb_iforest.decision_function(df_scaled)
outlier_scores_hbos = fb_hbos.decision_function(df_scaled)

# Combine the outlier scores
combined_outlier_scores = (outlier_scores_iforest + outlier_scores_hbos) / 2

# Get the indices of the top N features
top_N_indices = combined_outlier_scores.argsort()[-N:]

# Select these features from the original dataframe
df_reduced = df.iloc[:, top_N_indices]
