In [9]:
import pandas as pd
import numpy as np
import numpy.typing as npt
import os
import matplotlib.pyplot as plt
from scipy.spatial.transform import Rotation

In [10]:
!pwd

/Users/mm/Desktop/Myolab


In [11]:
directory_path="/Users/mm/Desktop/Myolab/imu_fitness_basic/armraise_v1.0/data"

In [4]:
armraise_df=[]
for subdir, dirs, files in os.walk(directory_path):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(subdir, file)
            df=pd.read_csv(file_path)
            df.insert(0, 'exercise', "armraise") 
            if 'Unnamed: 0' in df.columns:
                df.drop('Unnamed: 0', axis=1, inplace=True)
                
            armraise_df.append(df)

In [5]:
armraise_df = pd.concat(armraise_df, ignore_index=True)

In [6]:
armraise_df.head()

Unnamed: 0,exercise,rep_count_from_intermediate,rep_count_from_start,ref_xy_rotation,time,rotation_matrix_m11,rotation_matrix_m12,rotation_matrix_m13,rotation_matrix_m21,rotation_matrix_m22,rotation_matrix_m23,rotation_matrix_m31,rotation_matrix_m32,rotation_matrix_m33
0,armraise,0.464286,0.0,5.247949,0.0,-0.424278,0.135893,0.895277,-0.097775,-0.98977,0.1039,0.900238,-0.043453,0.433225
1,armraise,0.481861,0.017575,5.247949,0.05,-0.433772,0.134866,0.890872,-0.100751,-0.989794,0.100785,0.895372,-0.046038,0.442933
2,armraise,0.499241,0.034955,5.247949,0.1,-0.453615,0.138319,0.880398,-0.109453,-0.98905,0.098995,0.884451,-0.051456,0.463788
3,armraise,0.516428,0.052142,5.247949,0.15,-0.474558,0.14296,0.868538,-0.117201,-0.9882,0.098619,0.872387,-0.054993,0.485713
4,armraise,0.533423,0.069137,5.247949,0.2,-0.498132,0.1497,0.854081,-0.125717,-0.987045,0.099683,0.857939,-0.057717,0.510499


In [7]:
armraise_df.shape

(95636, 14)

In [8]:
armraise_df.columns

Index(['exercise', 'rep_count_from_intermediate', 'rep_count_from_start',
       'ref_xy_rotation', 'time', 'rotation_matrix_m11', 'rotation_matrix_m12',
       'rotation_matrix_m13', 'rotation_matrix_m21', 'rotation_matrix_m22',
       'rotation_matrix_m23', 'rotation_matrix_m31', 'rotation_matrix_m32',
       'rotation_matrix_m33'],
      dtype='object')

Check for Null Values

In [8]:
armraise_df.isnull().sum()

exercise                       0
rep_count_from_intermediate    0
rep_count_from_start           0
ref_xy_rotation                0
time                           0
rotation_matrix_m11            0
rotation_matrix_m12            0
rotation_matrix_m13            0
rotation_matrix_m21            0
rotation_matrix_m22            0
rotation_matrix_m23            0
rotation_matrix_m31            0
rotation_matrix_m32            0
rotation_matrix_m33            0
dtype: int64

Check for Outliers at each exercise level:
 
In this context, outliers mean an abnormal posture or movements with respect to that particular exercise. Need to identify them to ensure proper classification.

1. Based on a cursory reading, I understand that rotational matrices in general are orthogonal and have determinant close to 1 or exactly 1, so I plan on checking for any deviations to check for data corruption.

In [9]:
#matrix operations
def calculate_determinant(row):
    matrix = row.values.reshape(3, 3)
    return np.linalg.det(matrix)

def frobenius_norm(row):
    matrix = row.values.reshape(3, 3) # Reshape the row to form a 3x3 matrix
    return np.linalg.norm(matrix, 'fro') # Frobenius norm

In [10]:
rotation_matrix_labels = [
        "rotation_matrix_m11",
        "rotation_matrix_m12",
        "rotation_matrix_m13",
        "rotation_matrix_m21",
        "rotation_matrix_m22",
        "rotation_matrix_m23",
        "rotation_matrix_m31",
        "rotation_matrix_m32",
        "rotation_matrix_m33",
    ]

In [11]:
armraise_df['determinant'] = armraise_df[rotation_matrix_labels ].apply(calculate_determinant, axis=1)
outliers = armraise_df[np.abs(armraise_df['determinant'] - 1) > 0.05] #deviation from determinant 1

print("Detected outliers: ", outliers.shape)
print(outliers)

Detected outliers:  (0, 15)
Empty DataFrame
Columns: [exercise, rep_count_from_intermediate, rep_count_from_start, ref_xy_rotation, time, rotation_matrix_m11, rotation_matrix_m12, rotation_matrix_m13, rotation_matrix_m21, rotation_matrix_m22, rotation_matrix_m23, rotation_matrix_m31, rotation_matrix_m32, rotation_matrix_m33, determinant]
Index: []


The rotational matrices are orthogonal, hence no data corruption. Next,

2. To ensure consistency per exercise, we can check the outliers using frobenius norm where we compare each rotational matrix with an average rotational matrix to identify the deviation. 
 Large Norm values => Incorrect form or Erratic movements
 
 Ideally we need small and consistent norm values to ensure consistency and alignment in the movements for the exercise.

In [12]:
armraise_df['frobenius_norm'] = armraise_df[rotation_matrix_labels].apply(frobenius_norm, axis=1)

In [13]:
print(armraise_df['frobenius_norm'].describe())

count    9.563600e+04
mean     1.732051e+00
std      1.504582e-07
min      1.732049e+00
25%      1.732051e+00
50%      1.732051e+00
75%      1.732051e+00
max      1.732052e+00
Name: frobenius_norm, dtype: float64


The standard deviation between the norms is too minute, hence there is uniformity across all the movements for the arm_raise exercise. This is good, but we must check for overfitting later. Let's check for more inconsistencies once we make more sense of the features. But for now, the data seems to be from inconsistencies, missing values and outliers.

Exploratory Data Analysis: Perform an initial analysis to understand the data distribution, identify patterns, and uncover insights that will inform feature engineering.

In [16]:
armraise_df=armraise_df.drop(["frobenius_norm","determinant"],axis=1)

In [18]:
armraise_df.to_csv('armraise_data.csv', index=False)