In [2]:
import pandas as pd
import numpy as np
from tslearn.utils import to_time_series,to_time_series_dataset,to_sklearn_dataset
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.model_selection import train_test_split
from tslearn.neighbors import KNeighborsTimeSeriesClassifier,KNeighborsTimeSeries
from tslearn.svm import TimeSeriesSVC
from tslearn.preprocessing import TimeSeriesScalerMinMax
from sklearn.model_selection import GridSearchCV,StratifiedKFold,KFold
from sklearn.pipeline import Pipeline
import os
from scipy.spatial.transform import Rotation
import tslearn.metrics as tsm
import matplotlib.pyplot as plt

# Initialize routines needed to calculate Quaterneons

In [3]:
def quaternion_inverse(q):
  """Calculate the inverse of a quaternion.

  Args:
    q: A quaternion represented as a 4-element numpy array.

  Returns:
    The inverse of the quaternion, represented as a 4-element numpy array.
  """

  q_conj = np.conj(q)
  q_norm = np.linalg.norm(q)
  return q_conj / (q_norm * q_norm)

In [4]:
def quaternion_multiply(q1, q2):
  """Multiply two quaternions.

  Args:
    q1: A quaternion represented as a 4-element numpy array.
    q2: A quaternion represented as a 4-element numpy array.

  Returns:
    The product of the two quaternions, represented as a 4-
    element numpy array.
    
    """
  w1, x1, y1, z1 = q1
  w2, x2, y2, z2 = q2
  w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2
  x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2
  y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2
  z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2
  return w, x, y, z


In [5]:
def vec_length(v: np.array):
    return np.sqrt(sum(i**2 for i in v))

def normalize(v):
    norm = np.linalg.norm(v)
    if norm == 0: 
       return v
    return v / norm

def orientation(rs: np.array, ls: np.array):
    axis_z = normalize((rs - ls))
    if vec_length(axis_z) == 0:
        axis_z = np.array((0, -1, 0))
        
    axis_x = np.cross(np.array((0, 0, 1)), axis_z)
    if vec_length(axis_x) == 0:
        axis_x = np.array((1, 0, 0))
    
    axis_y = np.cross(axis_z, axis_x)
    rot_matrix = np.matrix([axis_x, axis_y, axis_z]).transpose()
    #quat=Rotation.from_matrix(rot_matrix).as_quat()
    return Rotation.from_matrix(rot_matrix).as_quat()

In [6]:
def to_quat(df: pd.DataFrame):
    return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()

In [7]:
def get_distance(df:list):
    dfr=[]
    for i in range(0,len(df)-1,2):
        xi = quaternion_inverse(df[i])
        xy = quaternion_multiply(xi,df[i+1])
        dfr.append(2*np.arccos(xy[0])*180/np.pi)
        #dfr.append(np.arccos(xy[0]))
    return dfr

# Loop thru the workout csv files and build one Pandas dataframe

In [8]:
workouts= pd.DataFrame()
y=[]
for root, dirs, files in os.walk('../Data/MediaPipe/Train'):
        for name in files:
                if(name.endswith('.csv') == False):
                    continue
                filepath = root + os.sep + name
                worktype = filepath.split(os.sep )[-2]
                filename = filepath.split(os.sep)[-1].replace('.csv','')
                if(os.path.getsize(filepath) > 0):
                    df = pd.read_csv(filepath,index_col=0)
                    df1 = pd.DataFrame()
                    df1['shoulder'] = get_distance(to_quat(pd.concat([df['11'],df['11.1'],df['11.2'],df['23'],df['23.1'],df['23.2']],axis=1)))
                    df1['hip'] = get_distance(to_quat(pd.concat([df['23'],df['23.1'],df['23.2'],df['24'],df['24.1'],df['24.2']],axis=1)))
                    df1['lhand1'] = get_distance(to_quat(pd.concat([df['11'],df['11.1'],df['11.2'],df['13'],df['13.1'],df['13.2']],axis=1)))
                    df1['rhand1'] = get_distance(to_quat(pd.concat([df['12'],df['12.1'],df['12.2'],df['14'],df['14.1'],df['14.2']],axis=1)))
                    df1['lhand2'] = get_distance(to_quat(pd.concat([df['13'],df['13.1'],df['13.2'],df['15'],df['15.1'],df['15.2']],axis=1)))
                    df1['rhand2'] = get_distance(to_quat(pd.concat([df['14'],df['14.1'],df['14.2'],df['16'],df['16.1'],df['16.2']],axis=1)))
                    df1['lleg1'] = get_distance(to_quat(pd.concat([df['23'],df['23.1'],df['23.2'],df['25'],df['25.1'],df['25.2']],axis=1)))
                    df1['rleg1'] = get_distance(to_quat(pd.concat([df['24'],df['24.1'],df['24.2'],df['26'],df['26.1'],df['26.2']],axis=1)))
                    df1['lleg2'] = get_distance(to_quat(pd.concat([df['25'],df['25.1'],df['25.2'],df['27'],df['27.1'],df['27.2']],axis=1)))
                    df1['rleg2'] = get_distance(to_quat(pd.concat([df['26'],df['26.1'],df['26.2'],df['28'],df['28.1'],df['28.2']],axis=1)))
                    df1['filename'] = filename
                    workouts = pd.concat([workouts,df1],axis=0)
                    y.append(worktype)


  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),axis=1).to_list()
  return df.apply(lambda x: orientation(np.array([x[0],x[1],x[2]]),np.array([x[3],x[4],x[5]])),a

# Scale the dataset

In [15]:
for x in workouts.select_dtypes(include=np.number).columns:
    workouts[x] = StandardScaler().fit_transform(workouts[x].values.reshape(-1,1))


In [16]:
X=workouts.set_index('filename').groupby('filename').apply(pd.DataFrame.to_numpy).to_numpy()
encoder = LabelEncoder()
y = encoder.fit_transform(y)
# conver to time series dataset needed for tslearn
formatted_time_series = to_time_series_dataset(X)
X_train, X_test, y_train, y_test = train_test_split(formatted_time_series, y, test_size=0.3, random_state=42)

# KNN Timeseries Classifier

In [17]:
params = {'n_neighbors':2,'weights':'distance','metric':'dtw'}
knn = KNeighborsTimeSeriesClassifier(**params)
knn.fit(X_train, y_train)

In [18]:
accuracy = knn.score(X_train, y_train),knn.score(X_test, y_test)

In [19]:
accuracy

(1.0, 0.7575757575757576)

# Use Stratified Kfold to cross validate the resuls

In [20]:
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
lst_accu_stratified = {'train_score':[],'test_score':[]}
  
for train_index, test_index in skf.split(X, y):
    x_train_fold, x_test_fold = formatted_time_series[train_index], formatted_time_series[test_index]
    y_train_fold, y_test_fold = y[train_index], y[test_index]
    knn.fit(x_train_fold, y_train_fold)
    lst_accu_stratified['train_score'].append(knn.score(x_train_fold, y_train_fold))
    lst_accu_stratified['test_score'].append(knn.score(x_test_fold, y_test_fold))



In [21]:
lst_accu_stratified

{'train_score': [1.0, 1.0, 1.0, 1.0, 1.0],
 'test_score': [0.8181818181818182,
  0.7727272727272727,
  0.8636363636363636,
  0.7727272727272727,
  0.7619047619047619]}

# Hyperparameter tuning using GridSearch

In [24]:
pipeline = GridSearchCV(
    Pipeline([
            ('knn', KNeighborsTimeSeriesClassifier())
    ]),
    {'knn__n_neighbors': [2,5], 'knn__weights': ['uniform', 'distance'],'knn__metric':['dtw', 'softdtw']},
)

In [25]:
pipeline.fit(X_train, y_train)

In [26]:
pipeline.cv_results_

{'mean_fit_time': array([0.07023497, 0.0694119 , 0.06190071, 0.06413183, 0.06313367,
        0.06395278, 0.06342444, 0.06216593]),
 'std_fit_time': array([0.00671765, 0.01048977, 0.00259405, 0.00395152, 0.00196393,
        0.00195888, 0.00178697, 0.00168892]),
 'mean_score_time': array([0.87895198, 0.86364808, 0.81385565, 0.8086194 , 1.18292027,
        1.1414484 , 1.14220824, 1.15995488]),
 'std_score_time': array([0.06798021, 0.0684863 , 0.07152478, 0.08578673, 0.13698069,
        0.07529232, 0.07505742, 0.07530937]),
 'param_knn__metric': masked_array(data=['dtw', 'dtw', 'dtw', 'dtw', 'softdtw', 'softdtw',
                    'softdtw', 'softdtw'],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_knn__n_neighbors': masked_array(data=[2, 2, 5, 5, 2, 2, 5, 5],
              mask=[False, False, False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_knn

In [27]:
pipeline.best_score_,pipeline.best_params_

(0.7383333333333333,
 {'knn__metric': 'softdtw', 'knn__n_neighbors': 2, 'knn__weights': 'uniform'})

# SVM Timeseries Classifier

In [30]:
svc = TimeSeriesSVC(kernel="gak", gamma=.1)
svc.fit(X_train, y_train)

In [31]:
svc.score(X_train, y_train),svc.score(X_test, y_test)

(1.0, 0.30303030303030304)

In [34]:
pipeline = GridSearchCV(
    Pipeline([
            ('svc', TimeSeriesSVC())
    ]),
    {'svc__kernel': ['gak',], 'svc__gamma': [0.1,0.5,1.0]},
)

In [35]:
pipeline.fit(X_train, y_train)
pipeline.score(X_train, y_train),pipeline.score(X_test, y_test)

(1.0, 0.30303030303030304)

In [36]:
pipeline.best_score_,pipeline.best_params_

(0.4208333333333333, {'svc__gamma': 0.1, 'svc__kernel': 'gak'})