In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from load_data.my_conn import load_sensor_ete_df

df_sensor_data = load_sensor_ete_df()

features = df_sensor_data.drop(["subject_id", "activity_id", "description"], axis=1)
labels = df_sensor_data["activity_id"]


X_train, X_test, y_train, y_test = train_test_split(
    features, labels, test_size=0.2, random_state=42
)

# Fit a random forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

importances = model.feature_importances_

# Map features to their importance scores
feature_importance_mapping = {
    feature: importance for feature, importance in zip(features.columns, importances)
}

# Sort features by importance
sorted_features = sorted(
    feature_importance_mapping.items(), key=lambda x: x[1], reverse=True
)

# Now you can decide how many sensors (features) to keep based on their importance scores
print(sorted_features)

[('acceleration_chest_z', 0.08117117146498286), ('acceleration_left_ankle_z', 0.06890162566102627), ('gyro_right_lower_arm_y', 0.0629035135676737), ('acceleration_right_lower_arm_z', 0.061125657400155346), ('acceleration_right_lower_arm_y', 0.05818681324928775), ('magnetometer_left_ankle_x', 0.05691819887980814), ('acceleration_right_lower_arm_x', 0.05513908303937116), ('acceleration_chest_x', 0.054440455784208405), ('gyro_right_lower_arm_x', 0.051707857394928954), ('acceleration_left_ankle_y', 0.05057159746504472), ('session_id', 0.05010267313271848), ('gyro_left_ankle_z', 0.047793567438118766), ('gyro_left_ankle_x', 0.047196008588112584), ('gyro_left_ankle_y', 0.04664810975521091), ('gyro_right_lower_arm_z', 0.044080753654051355), ('magnetometer_right_lower_arm_x', 0.028201282435873023), ('magnetometer_right_lower_arm_z', 0.027380045508084164), ('magnetometer_left_ankle_y', 0.022102013303115193), ('acceleration_chest_y', 0.02195390601111899), ('acceleration_left_ankle_x', 0.019529200