In [2]:
import os
import pandas as pd

# Function to load data from directories
def load_data(file_path):
    column_names = ['Time (s)', 'X (m/s2)', 'Y (m/s2)', 'Z (m/s2)', 'R (m/s2)', 'Theta (deg)', 'Phi (deg)']
    with open(file_path, 'r') as file:
        lines = file.readlines()[4:]  # Skip metadata lines
        data = [line.strip().split(',') for line in lines if len(line.strip().split(',')) == len(column_names)]
    df = pd.DataFrame(data, columns=column_names)
    df = df.apply(pd.to_numeric, errors='coerce')  # Convert to numeric
    return df

In [4]:
def load_and_parse_data(base_dir):
    subjects = os.listdir(base_dir)
    parsed_data = {}
    
    for subject in subjects:
        subject_dir = os.path.join(base_dir, subject)
        joints = os.listdir(subject_dir)
        parsed_data[subject] = {}
        
        for joint_file in joints:
            joint = os.path.splitext(joint_file)[0]
            file_path = os.path.join(subject_dir, joint_file)
            print(f"Reading file: {file_path}")
            
            try:
                joint_data = load_data(file_path)
                # print(f"Processed data:\n{joint_data.head()}")
                # print(f"Data shape: {joint_data.shape}")
                
                parsed_data[subject][joint] = joint_data
                
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")
    
    return parsed_data

In [5]:
base_dir = 'Final_Dataset'  # Update this to your base path
parsed_data = load_and_parse_data(base_dir)

Reading file: Final_Dataset/Abhay/LK.csv
Reading file: Final_Dataset/Abhay/LH.csv
Reading file: Final_Dataset/Abhay/RH.csv
Reading file: Final_Dataset/Abhay/RK.csv
Reading file: Final_Dataset/Abhay/RA.csv
Reading file: Final_Dataset/Abhay/LA.csv
Reading file: Final_Dataset/Kavya/LK.csv
Reading file: Final_Dataset/Kavya/LH.csv
Reading file: Final_Dataset/Kavya/RH.csv
Reading file: Final_Dataset/Kavya/RK.csv
Reading file: Final_Dataset/Kavya/RA.csv
Reading file: Final_Dataset/Kavya/LA.csv
Reading file: Final_Dataset/Gaavya/LK.csv
Reading file: Final_Dataset/Gaavya/LH.csv
Reading file: Final_Dataset/Gaavya/RH.csv
Reading file: Final_Dataset/Gaavya/RK.csv
Reading file: Final_Dataset/Gaavya/RA.csv
Reading file: Final_Dataset/Gaavya/LA.csv
Reading file: Final_Dataset/Vishnu/LK.csv
Reading file: Final_Dataset/Vishnu/LH.csv
Reading file: Final_Dataset/Vishnu/RH.csv
Reading file: Final_Dataset/Vishnu/RK.csv
Reading file: Final_Dataset/Vishnu/RA.csv
Reading file: Final_Dataset/Vishnu/LA.csv
Read

In [6]:
from scipy.ndimage import median_filter, gaussian_filter

# Apply Median and Gaussian filters
def median_then_gaussian_filter(data, kernel_size=5, sigma=1):
    median_filtered = median_filter(data, size=kernel_size)
    gaussian_filtered = gaussian_filter(median_filtered, sigma=sigma)
    return gaussian_filtered

def preprocess_data(data):
    for subject in data:
        for joint in data[subject]:
            df = data[subject][joint]
            filtered_data = {}
            for column in df.columns[1:]:  # Skip 'Time (s)'
                filtered_data[f'{column}_filtered'] = median_then_gaussian_filter(df[column].values)
            df_filtered = pd.DataFrame(filtered_data)
            df_filtered['Time (s)'] = df['Time (s)']
            data[subject][joint] = df_filtered
    return data

# Preprocess the data
parsed_data = preprocess_data(parsed_data)


In [7]:
# print(parsed_data)

In [16]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Function to create sequences for LSTM
def create_sequences(data, time_steps=50):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps])
        y.append(data[i+time_steps])
    return np.array(X), np.array(y)

# Prepare the data for each joint
time_steps = 50
X, y = {}, {}

for subject in parsed_data:
    X[subject], y[subject] = {}, {}
    for joint in parsed_data[subject]:
        df = parsed_data[subject][joint]
        columns_filtered = [col for col in df.columns if col != 'Time (s)']
        data_arr = df[columns_filtered].values
        X[subject][joint], y[subject][joint] = create_sequences(data_arr, time_steps)

# Split into train, validation, and test sets and scale the data
train_size = 0.7
val_size = 0.15
test_size = 0.15

X_train, X_val, X_test, y_train, y_val, y_test = {}, {}, {}, {}, {}, {}
scalers = {}
imputers = {}

for subject in X:
    X_train[subject], X_val[subject], X_test[subject] = {}, {}, {}
    y_train[subject], y_val[subject], y_test[subject] = {}, {}, {}
    scalers[subject] = {}
    imputers[subject] = {}
    
    for joint in X[subject]:
        n_train = int(len(X[subject][joint]) * train_size)
        n_val = int(len(X[subject][joint]) * val_size)
        
        X_train[subject][joint] = X[subject][joint][:n_train]
        y_train[subject][joint] = y[subject][joint][:n_train]
        X_val[subject][joint] = X[subject][joint][n_train:n_train + n_val]
        y_val[subject][joint] = y[subject][joint][n_train:n_train + n_val]
        X_test[subject][joint] = X[subject][joint][n_train + n_val:]
        y_test[subject][joint] = y[subject][joint][n_train + n_val:]
        
        # Handle NaNs and scale the data
        imputers[subject][joint] = SimpleImputer(strategy='mean')
        X_train[subject][joint] = imputers[subject][joint].fit_transform(
            np.nan_to_num(X_train[subject][joint], nan=0.0).reshape(-1, X_train[subject][joint].shape[-1])
        ).reshape(X_train[subject][joint].shape)
        
        scalers[subject][joint] = StandardScaler()
        X_train[subject][joint] = scalers[subject][joint].fit_transform(
            X_train[subject][joint].reshape(-1, X_train[subject][joint].shape[-1])
        ).reshape(X_train[subject][joint].shape)
        
        X_val[subject][joint] = imputers[subject][joint].transform(
            np.nan_to_num(X_val[subject][joint], nan=0.0).reshape(-1, X_val[subject][joint].shape[-1])
        ).reshape(X_val[subject][joint].shape)
        
        X_val[subject][joint] = scalers[subject][joint].transform(
            X_val[subject][joint].reshape(-1, X_val[subject][joint].shape[-1])
        ).reshape(X_val[subject][joint].shape)
        
        X_test[subject][joint] = imputers[subject][joint].transform(
            np.nan_to_num(X_test[subject][joint], nan=0.0).reshape(-1, X_test[subject][joint].shape[-1])
        ).reshape(X_test[subject][joint].shape)
        
        X_test[subject][joint] = scalers[subject][joint].transform(
            X_test[subject][joint].reshape(-1, X_test[subject][joint].shape[-1])
        ).reshape(X_test[subject][joint].shape)

print("Data preprocessing complete.")


Data preprocessing complete.


In [44]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Function to create LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=input_shape))
    model.add(LSTM(50, activation='relu'))
    model.add(Dense(input_shape[-1]))
    model.compile(optimizer='adam', loss='mse')
    return model

# Train the model for each joint
models = {}

for subject in X_train:
    models[subject] = {}
    for joint in X_train[subject]:
        print(f'Training model for {subject} - {joint}')
        model = create_lstm_model((X_train[subject][joint].shape[1], X_train[subject][joint].shape[2]))
        model.fit(X_train[subject][joint], y_train[subject][joint], epochs=50, batch_size=32, validation_data=(X_val[subject][joint], y_val[subject][joint]), verbose=1)
        models[subject][joint] = model
        model.save(f'{subject}_{joint}_lstm_model.h5')


: 

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# Evaluate the model for each joint
for subject in models:
    for joint in models[subject]:
        print(f'Evaluating model for {subject} - {joint}')
        model = models[subject][joint]
        predictions = model.predict(X_test[subject][joint])
        mse = mean_squared_error(y_test[subject][joint], predictions)
        mae = mean_absolute_error(y_test[subject][joint], predictions)
        print(f'{subject} - {joint} - Mean Squared Error: {mse}')
        print(f'{subject} - {joint} - Mean Absolute Error: {mae}')

        # Plot some test results for visual inspection
        plt.figure(figsize=(10, 6))
        plt.plot(y_test[subject][joint][:100, 0], label='True')
        plt.plot(predictions[:100, 0], label='Predicted')
        plt.title(f'True vs Predicted values for {subject} - {joint} - first feature')
        plt.legend()
        plt.show()
