In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from scipy.stats import skew, kurtosis
from scipy.fft import fft
from sklearn.svm import SVC

In [2]:
data_df=pd.read_csv('/Users/mm/Desktop/Myolab/prepared_data.csv')

In [3]:
data_df.columns

Index(['exercise', 'rep_count_from_intermediate', 'rep_count_from_start',
       'ref_xy_rotation', 'time', 'euler_X', 'euler_Y', 'euler_Z'],
      dtype='object')

In [4]:
data_df.shape

(1188661, 8)

In [5]:
def process_chunk(chunk):
    # Common processing for all Euler angles
    for angle in ['euler_X', 'euler_Y', 'euler_Z']:
        chunk[f'{angle}_diff'] = chunk[angle].diff().fillna(0)
        chunk[f'{angle}_roll_mean'] = chunk[angle].rolling(window=5).mean().fillna(method='bfill')
        chunk[f'{angle}_fft'] = np.abs(fft(chunk[angle].to_numpy()))
        chunk[f'{angle}_skew'] = chunk[angle].rolling(window=5).skew().fillna(method='bfill')
        chunk[f'{angle}_kurt'] = chunk[angle].rolling(window=5).kurt().fillna(method='bfill')

    return chunk

chunk_size = 100000  # Adjust chunk size based on your system's memory
chunks = [process_chunk(chunk) for chunk in np.array_split(data_df, len(data_df) // chunk_size + 1)]

# Concatenate all processed chunks back into a single DataFrame
data_df_processed = pd.concat(chunks, ignore_index=True)

In [6]:
data_df_processed.columns

Index(['exercise', 'rep_count_from_intermediate', 'rep_count_from_start',
       'ref_xy_rotation', 'time', 'euler_X', 'euler_Y', 'euler_Z',
       'euler_X_diff', 'euler_X_roll_mean', 'euler_X_fft', 'euler_X_skew',
       'euler_X_kurt', 'euler_Y_diff', 'euler_Y_roll_mean', 'euler_Y_fft',
       'euler_Y_skew', 'euler_Y_kurt', 'euler_Z_diff', 'euler_Z_roll_mean',
       'euler_Z_fft', 'euler_Z_skew', 'euler_Z_kurt'],
      dtype='object')

In [7]:
feature_columns = [
    'rep_count_from_intermediate', 'rep_count_from_start',
       'ref_xy_rotation', 'time', 'euler_X', 'euler_Y', 'euler_Z',
       'euler_X_diff', 'euler_X_roll_mean', 'euler_X_fft', 'euler_X_skew',
       'euler_X_kurt', 'euler_Y_diff', 'euler_Y_roll_mean', 'euler_Y_fft',
       'euler_Y_skew', 'euler_Y_kurt', 'euler_Z_diff', 'euler_Z_roll_mean',
       'euler_Z_fft', 'euler_Z_skew', 'euler_Z_kurt'
]
X = data_df_processed[feature_columns]
y = data_df_processed['exercise']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [8]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
classifier = RandomForestClassifier(
    n_estimators=100,      # More trees (consider increasing to 200 or 300)
    max_features='sqrt',   # Maximum number of features considered for splitting a node
    max_depth=10,          # Limit the depth of trees
    min_samples_split=10,  # Minimum number of samples required to split an internal node
    min_samples_leaf=4,    # Minimum number of samples required to be at a leaf node
    bootstrap=True,        # Use bootstrapping
    random_state=42        # For reproducibility
)
classifier.fit(X_train, y_train)

In [10]:
y_train_pred = classifier.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
print("Training Accuracy:", train_accuracy)
print(classification_report(y_train, y_train_pred))

Training Accuracy: 0.6773846179731798
                precision    recall  f1-score   support

      armraise       0.82      0.73      0.77     67071
     bicepcurl       0.76      0.96      0.85     71118
       birddog       0.68      0.98      0.80    260136
        burpee       0.61      0.49      0.54     91409
        crunch       0.90      0.31      0.46     42568
    hammercurl       0.48      0.68      0.56     87930
 overheadpress       0.78      0.35      0.48     60226
        pushup       0.73      0.44      0.55     52011
tricepkickback       0.93      0.16      0.28     35486
           vup       0.75      0.40      0.52     64107

      accuracy                           0.68    832062
     macro avg       0.75      0.55      0.58    832062
  weighted avg       0.71      0.68      0.65    832062


In [11]:
y_pred = classifier.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.672803344933665
                precision    recall  f1-score   support

      armraise       0.82      0.73      0.77     28565
     bicepcurl       0.75      0.95      0.84     30146
       birddog       0.68      0.97      0.80    111662
        burpee       0.59      0.48      0.53     38972
        crunch       0.89      0.29      0.44     18183
    hammercurl       0.48      0.67      0.56     38132
 overheadpress       0.78      0.34      0.48     25820
        pushup       0.73      0.43      0.54     22295
tricepkickback       0.92      0.16      0.27     15271
           vup       0.74      0.40      0.52     27553

      accuracy                           0.67    356599
     macro avg       0.74      0.54      0.58    356599
  weighted avg       0.70      0.67      0.64    356599


In [12]:
param_grid = {
    'n_estimators': [100, 200, 300],  # More trees may improve performance
    'max_features': ['sqrt', 'log2'],  # Number of features to consider when looking for the best split
    'max_depth': [10, 20, 30, None],  # Maximum number of levels in each decision tree
    'min_samples_split': [2, 10, 20],  # Minimum number of data points placed in a node before the node is split
    'min_samples_leaf': [1, 4, 10],  # Minimum number of data points allowed in a leaf node
    'bootstrap': [True, False]  # Method for sampling data points (with or without replacement)
}