In [4]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# Load your data from a CSV file
# Replace 'your_file.csv' with the path to your CSV file
data = pd.read_csv('4_Raw_data_csv.csv')

# Select the output feature (label)
y = data['St1_Angle_Hip_X']

# Select all other features except 'St1_Angle_Hip_X' and 'Time'
X = data.drop(columns=['St1_Angle_Hip_X', 'Time'])
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)
# Define the number of input features
input_dim = X_train.shape[1]

# Set the number of dimensions for the latent space (compressed representation)
latent_dim = 10  # Adjust according to how much you want to reduce the features

# Define the encoder
input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
encoded = Dense(32, activation='relu')(encoded)
latent = Dense(latent_dim, activation='relu')(encoded)

# Define the decoder
decoded = Dense(32, activation='relu')(latent)
decoded = Dense(64, activation='relu')(decoded)
output_layer = Dense(input_dim, activation='sigmoid')(decoded)

# Combine the encoder and decoder into an autoencoder model
autoencoder = Model(inputs=input_layer, outputs=output_layer)

# Compile the autoencoder
autoencoder.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# Train the autoencoder
autoencoder.fit(X_train, X_train,
                epochs=5,
                batch_size=32,
                validation_data=(X_test, X_test),
                verbose=2)
# Create a separate model for the encoder to extract the latent features
encoder = Model(inputs=input_layer, outputs=latent)

# Use the encoder to transform the data
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)

# Extract the weights of the first layer in the encoder
encoder_weights = encoder.layers[1].get_weights()[0]

# Analyze which original features contribute most to each latent feature
feature_importance = np.mean(np.abs(encoder_weights), axis=1)

# Sort features by importance
important_features_indices = np.argsort(feature_importance)[::-1]

# Map the indices to feature names
important_features = data.columns[important_features_indices]

print("Most important features (by name):", important_features)
# Select the most important features from the original data
# You can choose a threshold for the number of features you want to keep
# Using .iloc for index-based selection
selected_features = X.iloc[:, important_features_indices[:10]]  # Keep top 10 features, adjust as needed


# Convert selected features back to a DataFrame if you need to save or further process
selected_features_df = pd.DataFrame(selected_features, columns=important_features[:10])

# Save the selected features to a new CSV file
selected_features_df.to_csv('selected_featuresSt1_Angle_Hip_X.csv', index=False)


Epoch 1/5
2262/2262 - 7s - loss: 0.7462 - val_loss: 0.7274 - 7s/epoch - 3ms/step
Epoch 2/5
2262/2262 - 5s - loss: 0.6982 - val_loss: 0.7162 - 5s/epoch - 2ms/step
Epoch 3/5
2262/2262 - 5s - loss: 0.6915 - val_loss: 0.7126 - 5s/epoch - 2ms/step
Epoch 4/5
2262/2262 - 5s - loss: 0.6887 - val_loss: 0.7106 - 5s/epoch - 2ms/step
Epoch 5/5
2262/2262 - 5s - loss: 0.6869 - val_loss: 0.7089 - 5s/epoch - 2ms/step
Most important features (by name): Index(['St1_Torque_Knee_Z', 'St1_Angle_Pelvis_Z', 'St1_Torque_Knee_Y',
       'St1_Angle_Knee_X', 'St1_GRF_Y', 'St1_Torque_Ankle_X',
       'St1_Torque_Pelvis_Z', 'St1_Angle_Pelvis_Y', 'St1_Angle_Pelvis_X',
       'St1_Angle_Hip_Z', 'St1_Angle_Hip_Y', 'St1_Angle_Knee_Z',
       'St1_Angle_Hip_X', 'St1_Angle_Ankle_Y', 'St1_Angle_Ankle_X',
       'St1_GRF_Z', 'Time', 'St1_Torque_Pelvis_X', 'St1_Torque_Hip_X',
       'St1_Torque_Hip_Z', 'St1_Angle_Knee_Y', 'St1_GRF_X', 'St1_Torque_Hip_Y',
       'St1_Torque_Pelvis_Y', 'St1_Angle_Ankle_Z', 'St1_Torque_Knee_X