# **Libraries**

In [1]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
import os 
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim


from scipy.signal import savgol_filter
import scipy.signal as signal
import pywt
from scipy.interpolate import interp1d
from scipy import ndimage

from sklearn.ensemble import IsolationForest
import copy

import plotly.graph_objects as go

import warnings
warnings.filterwarnings("ignore")

# **Utils**

# Knee angle function

In [2]:
def calculate_left_knee_angle(df):
    '''The function takes a dataframe as input and iterates over each row in the dataframe. For each row, it extracts the accelerometer data for the right thigh and right shin, i.e., accelerometer_right_thigh_x, accelerometer_right_thigh_y, accelerometer_right_shin_x, and accelerometer_right_shin_y.

    To calculate the knee angle, the function uses the atan2 function from the math module. The atan2 function takes two arguments, y and x, and returns the angle in radians between the positive x-axis and the point (x, y) in the Cartesian plane.

    In this case, the atan2 function is used to calculate the angle between the vector formed by the thigh and shin accelerometer data. Specifically, it calculates the angle between the vector formed by the difference between the y-coordinates of the thigh and shin accelerometer data (thigh_y - shin_y) and the difference between the x-coordinates of the thigh and shin accelerometer data (thigh_x - shin_x).

    The math.degrees function is then used to convert the angle from radians to degrees. The resulting knee angle for each row is added to a new column 'knee_angle', which is then returned by the function.
    '''
    # calculate the knee angle for each row in the data frame
    for index, row in df.iterrows():
        thigh_x = row['accelerometer_left_thigh_x']
        thigh_y = row['accelerometer_left_thigh_y']
        shin_x = row['accelerometer_left_shin_x']
        shin_y = row['accelerometer_left_shin_y']
        
        knee_angle = math.degrees(math.atan2(thigh_y - shin_y, thigh_x - shin_x))
        
        # set the knee angle value using .loc to avoid SettingWithCopyWarning
        df.loc[index, 'knee_angle'] = knee_angle
        
    return df


def calculate_right_knee_angle(df):
    '''The function takes a dataframe as input and iterates over each row in the dataframe. For each row, it extracts the accelerometer data for the right thigh and right shin, i.e., accelerometer_right_thigh_x, accelerometer_right_thigh_y, accelerometer_right_shin_x, and accelerometer_right_shin_y.

    To calculate the knee angle, the function uses the atan2 function from the math module. The atan2 function takes two arguments, y and x, and returns the angle in radians between the positive x-axis and the point (x, y) in the Cartesian plane.

    In this case, the atan2 function is used to calculate the angle between the vector formed by the thigh and shin accelerometer data. Specifically, it calculates the angle between the vector formed by the difference between the y-coordinates of the thigh and shin accelerometer data (thigh_y - shin_y) and the difference between the x-coordinates of the thigh and shin accelerometer data (thigh_x - shin_x).

    The math.degrees function is then used to convert the angle from radians to degrees. The resulting knee angle for each row is added to a new column 'knee_angle', which is then returned by the function.
    '''
    # calculate the knee angle for each row in the data frame
    for index, row in df.iterrows():
        thigh_x = row['accelerometer_right_thigh_x']
        thigh_y = row['accelerometer_right_thigh_y']
        shin_x = row['accelerometer_right_shin_x']
        shin_y = row['accelerometer_right_shin_y']
        
        knee_angle = math.degrees(math.atan2(thigh_y - shin_y, thigh_x - shin_x))
        
        # set the knee angle value using .loc to avoid SettingWithCopyWarning
        df.loc[index, 'knee_angle'] = knee_angle
        
    return df

# Filter function

In [3]:
def wavelet_denoise(dfs):
    # create a copy of the input dfs
    denoised_dfs = dfs.copy()

    # define the wavelet and the thresholding function
    wavelet = pywt.Wavelet('sym4')
    threshold = 0.2

    # loop over all dataframes in dfs
    for df in tqdm(denoised_dfs):
        # loop over all accelerometer and gyroscope columns
        for col in df.columns:
            if 'accelerometer' in col or 'gyroscope' in col:
                # apply the wavelet transform to the column
                coeffs = pywt.wavedec(df[col], wavelet)

                # apply soft thresholding to the wavelet coefficients
                thresholded_coeffs = []
                for coeff in coeffs:
                    thresholded_coeffs.append(pywt.threshold(coeff, threshold, 'soft'))

                # reconstruct the denoised signal from the thresholded wavelet coefficients
                denoised_signal = pywt.waverec(thresholded_coeffs, wavelet)

                # interpolate the denoised signal to match the length of the original column
                t = np.linspace(0, 1, len(denoised_signal))
                f = interp1d(t, denoised_signal)
                denoised_signal = f(np.linspace(0, 1, len(df[col])))

                # replace the original sensor values with the denoised values
                df[col] = denoised_signal

    # return the denoised dataframes
    return denoised_dfs

def preprocess_data(dfs, fs, window_length=11, polyorder=4, cutoff=5, order=10, ma_window=5):
    """
    Applies Savitzky-Golay filter, Butterworth filter, and moving average filter on the accelerometer and gyroscope
    columns of each dataframe in the input list and returns a new list of preprocessed dataframes without modifying the
    original dataframes.

    :param dfs: list of dataframes to be filtered
    :param fs: sampling frequency of the sensor data
    :param window_length: length of the window used for Savitzky-Golay filter (default: 11)
    :param polyorder: order of the polynomial used for Savitzky-Golay filter (default: 4)
    :param cutoff: cutoff frequency for the Butterworth filter (default: 5)
    :param order: order of the Butterworth filter (default: 10)
    :param ma_window: window size used for moving average filter (default: 5)
    :return: list of preprocessed dataframes
    """
    
    filtered_dfs = []
    
    for df in tqdm(dfs):
        # Make a copy of the original dataframe
        filtered_df = df.copy()
        
        # Apply Savitzky-Golay filter
        filtered_df = savgol_filter(filtered_df, window_length=window_length, polyorder=polyorder)
        
        # Apply Butterworth filter
        filtered_df = butterworth_filter(filtered_df, fs, cutoff, order=order)
        
        # Apply moving average filter
        filtered_df = moving_average_filter(filtered_df, window_size=ma_window)
        
        filtered_dfs.append(filtered_df)
    
    return filtered_dfs

# **I- Data preparation:**

# Gait data

In [4]:
path = "/kaggle/input/hugadb-human-gait-database"
files = [f for f in os.listdir(path) if f.endswith(".csv")]
dfs = []

for file in tqdm(files):
    data = pd.read_csv(os.path.join(path, file))
    data = data.drop(columns= ['Unnamed: 0','EMG_right','EMG_left','accelerometer_right_foot_x','accelerometer_right_foot_y',
                              'accelerometer_right_foot_z','gyroscope_right_foot_x','gyroscope_right_foot_y','gyroscope_right_foot_z',
                              'accelerometer_right_shin_z','gyroscope_right_shin_z','accelerometer_right_thigh_z','gyroscope_left_shin_z',
                              'accelerometer_left_thigh_z','gyroscope_left_shin_z','gyroscope_right_thigh_z','gyroscope_left_thigh_z',
                               'accelerometer_left_shin_z','gyroscope_left_foot_z','accelerometer_left_foot_y','accelerometer_left_foot_x',
                              'accelerometer_left_foot_z', 'gyroscope_left_foot_x','gyroscope_left_foot_y'], axis=1)
    # Calculate the time between successive data points (assuming 50 Hz sampling rate)
    time_diff = 0.01
    time = [i * time_diff for i in range(len(data))]
    data['time'] = time
    data['filename'] = file
    data['person'] = data['filename'].str.split('_').str[-2:].str.join('_').str.split('.').str[0].astype(str).str[0:2]
    data['person'] = data['person'].astype(str).str.lstrip('0').astype(int)
    dfs.append(data)
    
person_dict = {
    1: {'weight': 75, 'height': 177, 'age': 24, 'sex': 'M'},
    2: {'weight': 80, 'height': 183, 'age': 22, 'sex': 'M'},
    3: {'weight': 65, 'height': 183, 'age': 23, 'sex': 'M'},
    4: {'weight': 93, 'height': 189, 'age': 24, 'sex': 'M'},
    5: {'weight': 63, 'height': 183, 'age': 35, 'sex': 'M'},
    6: {'weight': 54, 'height': 168, 'age': 25, 'sex': 'F'},
    7: {'weight': 52, 'height': 161, 'age': 22, 'sex': 'F'},
    8: {'weight': 80, 'height': 176, 'age': 23, 'sex': 'M'},
    9: {'weight': 65, 'height': 175, 'age': 24, 'sex': 'F'},
    10: {'weight': 118, 'height': 183, 'age': 27, 'sex': 'M'},
    11: {'weight': 85, 'height': 203, 'age': 24, 'sex': 'M'},
    12: {'weight': 85, 'height': 192, 'age': 23, 'sex': 'M'},
    13: {'weight': 64, 'height': 174, 'age': 18, 'sex': 'M'},
    14: {'weight': 68, 'height': 175, 'age': 19, 'sex': 'M'},
    15: {'weight': 72, 'height': 178, 'age': 23, 'sex': 'M'},
    16: {'weight': 48, 'height': 164, 'age': 26, 'sex': 'F'},
    17: {'weight': 85, 'height': 179, 'age': 25, 'sex': 'M'},
    18: {'weight': 70, 'height': 180, 'age': 19, 'sex': 'M'}
}

for df in tqdm(dfs):
    person_info = [person_dict[int(person)] for person in df['person']]
    df[['weight', 'height', 'age', 'sex']] = pd.DataFrame(person_info)

100%|██████████| 364/364 [00:18<00:00, 19.49it/s]
100%|██████████| 364/364 [00:02<00:00, 148.20it/s]


In [5]:
dfs[0].head()

Unnamed: 0,accelerometer_right_shin_x,accelerometer_right_shin_y,gyroscope_right_shin_x,gyroscope_right_shin_y,accelerometer_right_thigh_x,accelerometer_right_thigh_y,gyroscope_right_thigh_x,gyroscope_right_thigh_y,accelerometer_left_shin_x,accelerometer_left_shin_y,...,gyroscope_left_thigh_x,gyroscope_left_thigh_y,activity,time,filename,person,weight,height,age,sex
0,-15900,1512,-54,-165,-15692,-1408,11,-257,-15792,4320,...,23,-25,standing,0.0,HuGaDB_v2_various_03_20.csv,3,65,183,23,M
1,-15832,1532,29,-180,-15664,-1440,20,-323,-15728,4240,...,37,-23,standing,0.01,HuGaDB_v2_various_03_20.csv,3,65,183,23,M
2,-15936,1500,20,-146,-15632,-1476,-17,-329,-15752,4168,...,37,-31,standing,0.02,HuGaDB_v2_various_03_20.csv,3,65,183,23,M
3,-15932,1464,24,-149,-15840,-1552,25,-300,-15952,4128,...,35,-42,standing,0.03,HuGaDB_v2_various_03_20.csv,3,65,183,23,M
4,-15960,1576,14,-160,-15816,-1580,87,-204,-15808,4152,...,49,-38,standing,0.04,HuGaDB_v2_various_03_20.csv,3,65,183,23,M


In [6]:
dfs[0].columns

Index(['accelerometer_right_shin_x', 'accelerometer_right_shin_y',
       'gyroscope_right_shin_x', 'gyroscope_right_shin_y',
       'accelerometer_right_thigh_x', 'accelerometer_right_thigh_y',
       'gyroscope_right_thigh_x', 'gyroscope_right_thigh_y',
       'accelerometer_left_shin_x', 'accelerometer_left_shin_y',
       'gyroscope_left_shin_x', 'gyroscope_left_shin_y',
       'accelerometer_left_thigh_x', 'accelerometer_left_thigh_y',
       'gyroscope_left_thigh_x', 'gyroscope_left_thigh_y', 'activity', 'time',
       'filename', 'person', 'weight', 'height', 'age', 'sex'],
      dtype='object')

# **II- Data cleaning**

# Check for NaN values

In [7]:
has_nan = [df.isnull().values.any() for df in dfs]
print(any(has_nan))

False


# Removing outliers using isolation forrest model

In [8]:
'''# Define the Isolation Forest model with contamination parameter
model = IsolationForest(n_estimators=1000, max_samples='auto', contamination='auto', random_state=42)

# Create a copy of the dfs list
dfs_filtered = copy.deepcopy(dfs)

# Loop through each dataframe in the list and apply the Isolation Forest model
for i, df in enumerate(tqdm(dfs)):
    # Select only the numerical columns
    num_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()
    data = df[num_cols].values
    
    # Fit the model and predict outliers
    model.fit(data)
    outliers = model.predict(data)
    
    # Remove the outliers from the dataframe copy
    dfs_filtered[i] = df[outliers != -1]'''

"# Define the Isolation Forest model with contamination parameter\nmodel = IsolationForest(n_estimators=1000, max_samples='auto', contamination='auto', random_state=42)\n\n# Create a copy of the dfs list\ndfs_filtered = copy.deepcopy(dfs)\n\n# Loop through each dataframe in the list and apply the Isolation Forest model\nfor i, df in enumerate(tqdm(dfs)):\n    # Select only the numerical columns\n    num_cols = df.select_dtypes(include=['float64', 'int64']).columns.tolist()\n    data = df[num_cols].values\n    \n    # Fit the model and predict outliers\n    model.fit(data)\n    outliers = model.predict(data)\n    \n    # Remove the outliers from the dataframe copy\n    dfs_filtered[i] = df[outliers != -1]"

# Denoising autoencoder

In [9]:
# Instantiate the autoencoder
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Select the columns that correspond to the sensor data
sensor_cols = ['accelerometer_right_shin_x', 'accelerometer_right_shin_y', 'gyroscope_right_shin_x', 'gyroscope_right_shin_y',
               'accelerometer_right_thigh_x', 'accelerometer_right_thigh_y', 'gyroscope_right_thigh_x', 'gyroscope_right_thigh_y',
               'accelerometer_left_shin_x', 'accelerometer_left_shin_y', 'gyroscope_left_shin_x', 'gyroscope_left_shin_y',
               'accelerometer_left_thigh_x', 'accelerometer_left_thigh_y', 'gyroscope_left_thigh_x', 'gyroscope_left_thigh_y']

sensor_data = pd.concat([df[sensor_cols] for df in dfs], axis=0, ignore_index=True)

# Normalize the data
sensor_data = (sensor_data - sensor_data.mean()) / sensor_data.std()

# Convert the dataframe to a PyTorch tensor
sensor_data = torch.tensor(sensor_data.values).float().to(device)

In [10]:
class SensorAutoencoder(nn.Module):
    def __init__(self, input_size, hidden_size, device):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, hidden_size),
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_size, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, input_size),
        )
        self.device = device
    
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
    def l1_regularization(self, l1_lambda):
        l1_reg = torch.tensor(0., device=self.device)
        for name, param in self.named_parameters():
            if 'weight' in name:
                l1_reg += torch.norm(param, 1)
        return l1_lambda * l1_reg

# Define the input and hidden dimensions of the autoencoder
input_dim = len(sensor_cols)  # This should be the number of sensor columns in your dataframe
hidden_dim = 512

autoencoder = SensorAutoencoder(input_dim, hidden_dim, device=device).to(device)

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=0.0001)

# Define a learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

# Define early stopping parameters
patience = 10
best_loss = float('inf')
counter = 0

# Define the number of epochs to train for
num_epochs = 100

# Define the L1 regularization lambda
l1_lambda = 0.001


# Define a dataloader to iterate over the data in batches
batch_size = 64
data_loader = torch.utils.data.DataLoader(sensor_data, batch_size=batch_size, shuffle=True)

In [11]:
#Train the autoencoder
for epoch in range(num_epochs):
    for data in data_loader:
        optimizer.zero_grad()
        data = data.to(device)
        recon_data = autoencoder(data)
        l1_reg = autoencoder.l1_regularization(l1_lambda)
        loss = criterion(recon_data, data) + l1_reg
        loss.backward()
        optimizer.step()

    # Evaluate the loss on the entire dataset
    with torch.no_grad():
        recon_data = autoencoder(sensor_data)
        val_loss = criterion(recon_data, sensor_data)
    
    # Update the learning rate scheduler and print the loss after every epoch
    scheduler.step(val_loss)
    print('Epoch [{}/{}], Loss: {:.4f}, Val Loss: {:.4f}'.format(epoch+1, num_epochs, loss.item(), val_loss.item()))

    # Early stopping
    if val_loss < best_loss:
        best_loss = val_loss
        counter = 0
    else:
        counter += 1
    if counter >= patience:
        print(f'Early stopping at epoch {epoch}')
        break

Epoch [1/100], Loss: 0.3407, Val Loss: 0.0138
Epoch [2/100], Loss: 0.2368, Val Loss: 0.0083
Epoch [3/100], Loss: 0.1902, Val Loss: 0.0087
Epoch [4/100], Loss: 0.1860, Val Loss: 0.0065
Epoch [5/100], Loss: 0.1776, Val Loss: 0.0058
Epoch [6/100], Loss: 0.1877, Val Loss: 0.0052
Epoch [7/100], Loss: 0.1718, Val Loss: 0.0044
Epoch [8/100], Loss: 0.1475, Val Loss: 0.0049
Epoch [9/100], Loss: 0.1463, Val Loss: 0.0049
Epoch [10/100], Loss: 0.1822, Val Loss: 0.0056
Epoch [11/100], Loss: 0.1692, Val Loss: 0.0058
Epoch [12/100], Loss: 0.1370, Val Loss: 0.0030
Epoch [13/100], Loss: 0.1453, Val Loss: 0.0031
Epoch [14/100], Loss: 0.1317, Val Loss: 0.0029
Epoch [15/100], Loss: 0.1278, Val Loss: 0.0032
Epoch [16/100], Loss: 0.1193, Val Loss: 0.0032
Epoch [17/100], Loss: 0.1307, Val Loss: 0.0037
Epoch [18/100], Loss: 0.1305, Val Loss: 0.0031
Epoch [19/100], Loss: 0.1412, Val Loss: 0.0028
Epoch [20/100], Loss: 0.1164, Val Loss: 0.0028
Epoch [21/100], Loss: 0.1667, Val Loss: 0.0027
Epoch [22/100], Loss: 

In [12]:
torch.save(autoencoder.state_dict(), 'sensor_autoencoder_2.pt')

In [13]:
denoised_dfs = []
# Loop over the original dataframes
for i, df in tqdm(enumerate(dfs), total=len(dfs)):
    # Convert the dataframe to a PyTorch tensor and pass it through the autoencoder
    df_tensor = torch.tensor(df[sensor_cols].values).float().to(device)
    denoised_tensor = autoencoder(df_tensor)

    # Convert the denoised tensor back to a Pandas dataframe and combine it with the original dataframe
    denoised_df = pd.DataFrame(denoised_tensor.detach().cpu().numpy(), columns=sensor_cols)
    denoised_df.index = df.index
    denoised_df = pd.concat([df.drop(sensor_cols, axis=1), denoised_df], axis=1)
    
    # Append the denoised dataframe to the list
    denoised_dfs.append(denoised_df)

100%|██████████| 364/364 [00:01<00:00, 251.77it/s]


In [14]:
denoised_dfs[10]

Unnamed: 0,activity,time,filename,person,weight,height,age,sex,accelerometer_right_shin_x,accelerometer_right_shin_y,...,gyroscope_right_thigh_x,gyroscope_right_thigh_y,accelerometer_left_shin_x,accelerometer_left_shin_y,gyroscope_left_shin_x,gyroscope_left_shin_y,accelerometer_left_thigh_x,accelerometer_left_thigh_y,gyroscope_left_thigh_x,gyroscope_left_thigh_y
0,standing,0.00,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.107673,-0.884439,...,0.070794,0.267006,-0.006241,-3.379800,-0.028462,0.160814,0.014758,-2.483240,-0.021056,-0.391887
1,standing,0.01,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.066968,-0.719065,...,0.156643,-0.014225,0.604659,-3.214842,-0.034063,-0.300038,-0.809060,-1.709449,-0.151850,0.204215
2,standing,0.02,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.066968,-1.700522,...,0.127307,-0.253029,0.039111,-3.517576,0.005876,-0.615921,-1.331798,-0.844585,-0.221674,0.764353
3,standing,0.03,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.110640,-1.576637,...,0.021050,-0.230475,0.284215,-2.376308,0.027212,-0.352657,-0.780340,-0.996903,-0.124306,0.247893
4,standing,0.04,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.368655,-1.225337,...,-0.031694,-0.129100,0.517537,-2.648389,-0.005333,-0.155165,-0.357909,-2.143814,-0.026735,-0.012083
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5743,standing,57.43,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.066968,-0.525076,...,-0.047467,0.197346,0.044424,0.073489,0.086621,-0.025618,0.022478,0.462998,0.479823,0.042735
5744,standing,57.44,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.106732,-0.122698,...,-0.061463,0.147343,0.112127,1.778283,0.043601,0.099898,0.396085,1.190408,0.410198,-0.312436
5745,standing,57.45,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.192714,0.537579,...,-0.024124,0.083651,0.366407,1.049161,0.042132,-0.229488,0.053876,1.482234,0.078446,-0.340706
5746,standing,57.46,HuGaDB_v2_various_07_19.csv,7,52,161,22,F,-0.105723,0.123889,...,0.035503,0.149961,0.012473,0.502044,0.067471,-0.338358,-0.216250,0.980749,-0.271076,-0.296139


# Separate the dataframes into left and right legs dataframes

In [15]:
left_dfs = []
right_dfs = []
denoised_left_dfs = []
denoised_right_dfs = []

right_cols = ['time','person','weight', 'height', 'age', 'sex','accelerometer_right_shin_x', 'accelerometer_right_shin_y', 'gyroscope_right_shin_x', 'gyroscope_right_shin_y', 'accelerometer_right_thigh_x', 'accelerometer_right_thigh_y', 'gyroscope_right_thigh_x', 'gyroscope_right_thigh_y','activity','filename']
left_cols = ['time','person','weight', 'height', 'age', 'sex','accelerometer_left_shin_x', 'accelerometer_left_shin_y', 'gyroscope_left_shin_x', 'gyroscope_left_shin_y', 'accelerometer_left_thigh_x', 'accelerometer_left_thigh_y', 'gyroscope_left_thigh_x', 'gyroscope_left_thigh_y', 'activity','filename']


for df in tqdm(dfs):
    right_df = df[right_cols]
    left_df = df[left_cols]
    
    right_dfs.append(right_df)
    left_dfs.append(left_df)

for df in tqdm(denoised_dfs):
    denoised_right_df = df[right_cols]
    denoised_left_df = df[left_cols]
    
    denoised_left_dfs.append(denoised_left_df)
    denoised_right_dfs.append(denoised_right_df)

100%|██████████| 364/364 [00:00<00:00, 544.93it/s]
100%|██████████| 364/364 [00:00<00:00, 497.93it/s]


# Calculate knee angle

In [16]:
# add the knee_angle column to each data frame in left_dfs
for i, df in tqdm(enumerate(left_dfs), total=len(left_dfs)):
    df_with_knee_angle = calculate_left_knee_angle(df)
    left_dfs[i] = df_with_knee_angle

100%|██████████| 364/364 [05:58<00:00,  1.01it/s]


In [17]:
# add the knee_angle column to each data frame in right_dfs
for i, df in tqdm(enumerate(right_dfs), total=len(right_dfs)):
    df_with_knee_angle = calculate_right_knee_angle(df)
    right_dfs[i] = df_with_knee_angle

100%|██████████| 364/364 [06:03<00:00,  1.00it/s]


In [18]:
# add the knee_angle column to each data frame in left_dfs
for i, df in tqdm(enumerate(denoised_left_dfs), total=len(denoised_left_dfs)):
    df_with_knee_angle = calculate_left_knee_angle(df)
    denoised_left_dfs[i] = df_with_knee_angle

100%|██████████| 364/364 [06:04<00:00,  1.00s/it]


In [19]:
# add the knee_angle column to each data frame in left_dfs
for i, df in tqdm(enumerate(denoised_right_dfs), total=len(denoised_right_dfs)):
    df_with_knee_angle = calculate_right_knee_angle(df)
    denoised_right_dfs[i] = df_with_knee_angle

100%|██████████| 364/364 [06:02<00:00,  1.00it/s]


# Comparative visualization

In [20]:

# Define the index of the dataframe to compare #7, #8, #6 # 15, 
idx = 15

# Get the dataframes from both lists using the index
right_df = right_dfs[idx]
denoised_right_df = denoised_right_dfs[idx]

# Check if the activity column has walking in it
if 'walking' in right_df['activity'].unique():

    # Filter the dataframes to only include walking activity
    right_walking = right_df[right_df['activity'] == 'walking'].head(200)
    denoised_right_walking = denoised_right_df[denoised_right_df['activity'] == 'walking'].head(200)

    # Plot the knee angle in time for walking activity
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=right_walking['time'], y=right_walking['knee_angle'], name='right Knee Angle'))
    fig.add_trace(go.Scatter(x=denoised_right_walking['time'], y=denoised_right_walking['knee_angle'], name='denoised right Knee Angle'))
    fig.update_layout(xaxis_title='Time (s)', yaxis_title='Knee Angle (deg)',
                      title=f'Comparison of Knee Angle in Time for Walking Activity (Index: {idx})', legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))
    fig.show()

else:
    print("Check another dataframe, 'walking' activity not found")