In [36]:
import json
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
import os
import seaborn as sns

In [37]:
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC

In [38]:
import pickle

# Notebook for the normalization and distance calculation between the landmarks

In [39]:
#Dictionary indicating the index for each bodypart
BODY_POINTS = {
    "Nose":  0,
    "LEyeIn": 1,
    "LEye": 2,
    "LEyeOut": 3,
    "REyeIn": 4,
    "REye": 5,
    "REyeOut": 6,
    "LEar": 7,
    "REar": 8,
    "LMouth": 9,
    "RMouth": 10,
    "LShoulder": 11,
    "RShoulder": 12,
    "LElbow": 13,
    "RElbow": 14,
    "LWrist": 15,
    "RWrist": 16,
    "LPinky": 17,
    "RPinky": 18,
    "LIndex": 19,
    "RIndex": 20,
    "LThumb": 21,
    "RThumb": 22,
    "LHip": 23,
    "RHip": 24,
    "LKnee": 25,
    "RKnee": 26,
    "LAnkle": 27,
    "RAnkle": 28,
    "LHeel": 29,
    "RHeel": 30,
    "LFoot": 31,
    "RFoot": 32
    }

In [40]:
def load_json(folder, filename):
    '''Loads a json file using the folder and filename as an imput'''
    
    filepath = os.path.join(folder, filename)
    dict_list = []
    
    with open(filepath) as json_file:
        data = json.load(json_file)
        
    for key, value in data.items():
        dict_list.append(value)
        
    return dict_list

In [41]:
def dict_to_array(dictionary):
    '''Transforms a json (or dictionary) into a numpy array'''
    
    array = []
    
    for key, value in dictionary.items():
        array.append(value)
        
    return np.array(array)

In [42]:
def calculate_body_angle(coordinates, body_part1, body_part2):
    '''Calculates the angle of the body using two body parts as input, body_part1 will be used as the center of the new coordinate system'''
    
    coordinates = coordinates[:, :2].copy()
    
    a = coordinates[BODY_POINTS[body_part1]]
    b = coordinates[BODY_POINTS[body_part2]]
    c = np.array([b[0],a[1]])
    
    v1 = np.subtract(b,a)
    v2 = np.subtract(c,a)
    
    dot = v1.dot(v2)
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    
    angle_rad = np.arccos(dot/(norm_v1*norm_v2))
    
    return math.degrees(angle_rad)

In [43]:
def get_pose_center(coordinates):
    '''Calculate the centre of the pose, defined as the midpoint between point the neck and the mid hip'''
    
    mid_hip = (coordinates[BODY_POINTS['LHip']] + coordinates[BODY_POINTS['RHip']])/2
    neck = (coordinates[BODY_POINTS['LShoulder']] + coordinates[BODY_POINTS['RShoulder']])/2
    center = (mid_hip + neck)/2
    
    return center

In [44]:
def get_distance(coordinates, body_part1, body_part2, dimensions = '2D'):
    '''Calculates the distance between two body parts'''
    
    if dimensions == '2D':
        coordinates = coordinates[:, :2].copy()
    
    bp1 = coordinates[BODY_POINTS[body_part1]]
    bp2 = coordinates[BODY_POINTS[body_part2]]
    
    distance = ((bp1-bp2)*(bp1-bp2)).sum() ** 0.5
    
    return distance

In [45]:
def get_pose_size(coordinates, torso_size_multiplier=2):
    """Calculates pose size.
    It is the maximum of two values:
      * Torso size multiplied by torso_size_multiplier
      * Maximum distance from pose center to any pose landmark
    """
    
    # This approach uses only 2D landmarks to compute pose size.
    coordinates = coordinates[:, :2].copy()

    # Hips center.
    left_hip = coordinates[BODY_POINTS['LHip']]
    right_hip = coordinates[BODY_POINTS['RHip']]
    hips = (left_hip + right_hip) * 0.5

    # Shoulders center.
    left_shoulder = coordinates[BODY_POINTS['LShoulder']]
    right_shoulder = coordinates[BODY_POINTS['RShoulder']]
    shoulders = (left_shoulder + right_shoulder) * 0.5

    # Torso size as the minimum body size.
    torso_size = np.linalg.norm(shoulders - hips)

    # Max dist to pose center.
    pose_center = get_pose_center(coordinates)
    max_dist = np.max(np.linalg.norm(coordinates - pose_center, axis=1))

    return max(torso_size * torso_size_multiplier, max_dist)

In [46]:
def normalize_pose_landmarks(coordinates, torso_size_multiplier=2):
    """Normalizes landmarks translation and scale."""

    norm_coordinates = coordinates.copy()
    
    # Normalize translation.
    pose_center = get_pose_center(norm_coordinates)
    norm_coordinates -= pose_center

    # Normalize scale.
    pose_size = get_pose_size(norm_coordinates, torso_size_multiplier)
    norm_coordinates /= pose_size
    # Multiplication by 100 is not required, but makes it eaasier to debug.
    norm_coordinates *= 100

    return norm_coordinates

In [47]:
def get_angle(coordinates, body_part1, body_part2, body_part3, dimensions = '2D'):
    '''Calculates the angle between three body parts'''
   
    if dimensions == '2D':
        coordinates = coordinates[:, :2].copy()
    
    a = coordinates[BODY_POINTS[body_part1]]
    b = coordinates[BODY_POINTS[body_part2]]
    c = coordinates[BODY_POINTS[body_part3]]
    
    v1 = np.subtract(a,b)
    v2 = np.subtract(c,b)
    
    dot = v1.dot(v2)
    
    norm_v1 = np.linalg.norm(v1)
    norm_v2 = np.linalg.norm(v2)
    
    angle_rad = np.arccos(dot/(norm_v1*norm_v2))
    
    return math.degrees(angle_rad)

In [48]:
def calculate_pairwise_distances(coordinates, torso_size_multiplier=2):
    '''Calculates a set of distances for a coordinate system'''
    
    pairs = [('LShoulder', 'LWrist'), ('RShoulder', 'RWrist'), ('RHip', 'RAnkle'), ('LHip', 'LAnkle'), ('RWrist', 'LWrist'),
             ('LAnkle', 'RAnkle'), ('RHip', 'RWrist'), ('LHip', 'LWrist'), ('LWrist', 'LAnkle'), ('RWrist', 'RAnkle'), 
             ('RKnee', 'LKnee'), ('RHip', 'LKnee'), ('LHip', 'RKnee')]
    
    distances_list = []
    
    for pair in pairs:
        norm_coordinates = normalize_pose_landmarks(coordinates)
        distance = round(get_distance(norm_coordinates, pair[0], pair[1]),2)
        distances_list.append(distance)
        
    return distances_list

In [49]:
def calculate_set_of_angles(coordinates):
    '''Calculates a set of angles for a coordinates system'''
    
    joints = [('LShoulder', 'LElbow', 'LWrist'), ('RShoulder', 'RElbow', 'RWrist'), ('LShoulder', 'LHip', 'LKnee'),
              ('RShoulder', 'RHip', 'RKnee'), ('LHip', 'LKnee', 'LAnkle'), ('RHip', 'RKnee', 'RAnkle')]
    
    angles_list = []
    
    for joint in joints:        
        angle = round(get_angle(coordinates, joint[0], joint[1], joint[2]),2)
        angles_list.append(angle)
        
    return angles_list

In [50]:
def load_data(folder):
    '''Loads the data to train the model into a 2D list of lists shape'''
    
    exercises = {'PushUps':0, 'JumpingJack': 1, 'BodyWeightSquats':2, 'Lunges':3}
    selection =['PushUps', 'JumpingJack', 'BodyWeightSquats', 'Lunges']
    file_list = os.listdir(folder)
    data = []

    for file in file_list:
        
        if file.endswith('.json'):
            splited = file.split('_')
            exercise_name = splited[1]
            
            if exercise_name in selection :
                loaded_json = load_json(folder, file)
                frames_distances = []
                frames_angles =[]
                body_angles=[]
                ''
                for index in range(0,len(loaded_json)):
                    frame = loaded_json[index]
                    coordinates = dict_to_array(frame)
                    distances_list = calculate_pairwise_distances(coordinates)
                    angles_list = calculate_set_of_angles(coordinates)
                    body_angle = calculate_body_angle(coordinates, 'LAnkle', 'Nose')
                    body_angles.append(body_angle)
                    frames_distances.append(distances_list)
                    frames_angles.append(angles_list)
                    
                distances_array = np.array(frames_distances)
                joints_angles_array = np.array(frames_angles)
                min_angles = joints_angles_array.min(axis=0)
                max_angles = joints_angles_array.max(axis=0)
                
                angles_array = np.array(body_angles)
                min_distances = distances_array.min(axis=0)
                max_distances = distances_array.max(axis=0)
                mean_body_angle = angles_array.mean()
                
                row = []
                
                for position in range(0,len(min_distances)):
                    row.append(min_distances[position])
                    row.append(max_distances[position])
               
                for position in range (0,len(min_angles)) :
                    row.append(min_angles[position])
                    row.append(max_angles[position])
                    
                row.append(mean_body_angle)
                row.append(exercises[exercise_name])
                data.append(row)
                
    return data


In [51]:
def load_test_data(test_folder):
    '''Loads the data into a 2D list of lists shape'''
   
    file_list = os.listdir(test_folder)
    exercises = {'PushUps':0, 'JumpingJack': 1, 'BodyWeightSquats':2, 'Lunges': 3}
    selection =['PushUps', 'JumpingJack', 'BodyWeightSquats', 'Lunges']
    test_data = []

    for file in file_list:
        
        if file.endswith('.json'):
            splited = file.split('.')
            exercise_name = splited[0].split('_')[0]
            
            if exercise_name in selection :
                loaded_json = load_json(test_folder, file)
                frames_distances = []
                body_angles=[]
                frames_angles =[]
                
                for index in range(0,len(loaded_json)):
                    frame = loaded_json[index]
                    coordinates = dict_to_array(frame)
                    angles_list = calculate_set_of_angles(coordinates)
                    distances_list = calculate_pairwise_distances(coordinates)
                    body_angle = calculate_body_angle(coordinates, 'LAnkle', 'Nose')
                    body_angles.append(body_angle)
                    frames_distances.append(distances_list)
                    frames_angles.append(angles_list)
                
                joints_angles_array = np.array(frames_angles)
                min_angles = joints_angles_array.min(axis=0)
                max_angles = joints_angles_array.max(axis=0)

                distances_array = np.array(frames_distances)
                angles_array = np.array(body_angles)

                min_distances = distances_array.min(axis=0)
                max_distances = distances_array.max(axis=0)
                mean_body_angle = angles_array.mean()
                
                row = []
                
                for position in range(0,len(min_distances)):
                    row.append(min_distances[position])
                    row.append(max_distances[position])
                    
                for position in range (0,len(min_angles)) :
                    row.append(min_angles[position])
                    row.append(max_angles[position])
                    
                row.append(mean_body_angle)
                row.append(int(exercises[exercise_name]))
                row.append(file)
                test_data.append(row)
    return test_data


In [52]:
def data_to_df(folder, test=False): 
    '''Transforms the data into a pandas dataframe object'''
    
    cols = ['min_d0', 'max_d0', 'min_d1', 'max_d1', 'min_d2', 'max_d2', 'min_d3', 'max_d3', 'min_d4', 'max_d4',
            'min_d5', 'max_d5', 'min_d6', 'max_d6', 'min_d7', 'max_d7', 'min_d8', 'max_d8', 'min_d9', 'max_d9',
            'min_d10', 'max_d10', 'min_d11', 'max_d11', 'min_d12', 'max_d12', 'min_a0', 'max_a0', 'min_a1', 'max_a1',
            'min_a2', 'max_a2', 'min_a3', 'max_a3', 'min_a4', 'max_a4', 'min_a5', 'max_a5','mean_body_angle', 'class']
    
    if test == True:
        data = load_test_data(folder)
        cols.append('filename')
        dataframe = pd.DataFrame(data, columns =cols)
        return dataframe
        
    else : 
        data = load_data(folder)
       
        dataframe = pd.DataFrame(data, columns =cols)
    
        return dataframe

# Data Loading

## Define filepaths

In [53]:
#jsons filepaths
folder = '../raw_data/jsons'
tests_folder = '../raw_data/new_test_jsons'
calculate_deltas = False

## Load training and testing data

In [54]:
data = data_to_df(folder)

In [55]:
data.head()

Unnamed: 0,min_d0,max_d0,min_d1,max_d1,min_d2,max_d2,min_d3,max_d3,min_d4,max_d4,...,min_a2,max_a2,min_a3,max_a3,min_a4,max_a4,min_a5,max_a5,mean_body_angle,class
0,0.63,36.11,2.3,13.99,49.94,69.31,43.28,74.3,1.89,34.37,...,82.86,179.34,72.18,178.6,81.25,179.22,78.31,174.07,73.748449,3
1,8.03,35.84,19.36,48.82,68.3,70.54,64.88,67.06,13.01,25.2,...,151.32,166.67,153.94,166.94,159.24,169.19,164.88,171.13,5.256235,0
2,5.04,47.7,2.87,45.7,58.24,68.62,45.72,70.92,2.8,95.71,...,114.04,179.51,122.61,179.98,111.76,179.89,150.58,179.87,80.704001,1
3,21.49,47.07,23.71,48.19,53.99,68.97,52.7,69.14,16.11,82.46,...,162.77,178.26,165.71,176.11,164.69,179.57,165.33,179.77,85.606904,1
4,8.93,47.03,9.47,45.24,61.87,70.5,61.09,69.39,22.85,100.34,...,147.03,177.25,148.5,177.89,171.41,179.94,170.8,179.99,81.286244,1


In [56]:
if calculate_deltas:
    for i in range(0, 13) :
        col_min = f'min_d{i}'
        col_max= f'max_d{i}'
        data[f'delta_d{i}'] = data[col_max] - data[col_min]

In [57]:
tests = data_to_df(tests_folder, test=True)

In [58]:
tests.head()

Unnamed: 0,min_d0,max_d0,min_d1,max_d1,min_d2,max_d2,min_d3,max_d3,min_d4,max_d4,...,max_a2,min_a3,max_a3,min_a4,max_a4,min_a5,max_a5,mean_body_angle,class,filename
0,19.14,39.14,19.57,48.97,48.68,68.08,41.44,61.74,0.36,15.86,...,179.92,91.46,160.8,64.13,166.53,74.68,179.94,66.556565,3,Lunges_7.json
1,3.42,52.91,5.24,57.59,62.02,69.82,34.19,67.49,3.68,78.15,...,179.86,154.11,179.91,124.24,179.99,129.26,176.91,85.935388,1,JumpingJack_8.json
2,23.03,30.56,22.88,29.86,31.47,64.05,39.33,70.14,0.51,5.08,...,179.65,68.31,179.99,57.84,179.97,58.56,178.7,78.475268,2,BodyWeightSquats_15.json
3,33.63,49.21,33.76,57.5,63.59,71.33,57.05,63.85,19.64,39.04,...,179.98,88.44,179.79,85.49,179.9,108.42,179.94,85.138332,2,BodyWeightSquats_4.json
4,38.76,56.74,46.56,60.43,38.17,69.32,32.98,61.63,0.5,12.8,...,179.92,40.28,179.48,55.16,179.7,58.47,179.65,74.544289,2,BodyWeightSquats_19.json


In [59]:
if calculate_deltas:
    for i in range(0, 13) :
        col_min = f'min_d{i}'
        col_max= f'max_d{i}'
        tests[f'delta_d{i}'] = tests[col_max] - tests[col_min]

# Data Selection & Scaling

In [60]:
#Define X and y
X = data.drop(columns=['class']).copy()
y = data['class'].copy()

In [61]:
#Scale X 
scaler = MinMaxScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

In [62]:
#Split X into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2)

# SVC Grid Search

In [63]:
#Instantiating the model

model_svc = SVC()

param_grid_svc = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf', 'poly', 'sigmoid']} 
  
search_svc = GridSearchCV(model_svc, param_grid_svc, scoring = 'accuracy', n_jobs=-1)
  
# Fitting the model for grid search
search_svc.fit(X_train, y_train)

# Best scores
best_score_training = search_svc.best_score_
y_pred_0 = search_svc.predict(X_test)
best_score_testing = accuracy_score(y_test, y_pred_0)

print(f'Best training accuracy: {round(best_score_training, 2)*100}%')
print(f'Best testing accuracy: {round(best_score_testing, 2)*100}%')

Best training accuracy: 99.0%
Best testing accuracy: 98.0%


In [64]:
search_svc.best_params_

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}

In [65]:
#Defining and scaling X_real from our testing data
X_real = tests.drop(columns=['class', 'filename']).copy()
X_real_scaled = scaler.transform(X_real)
y_real = tests['class'].copy()

In [66]:
#Predicting and evaluating model on our testing data
y_pred = search_svc.predict(X_real_scaled)
score = accuracy_score(y_real, y_pred)
results = pd.DataFrame({'Filename': tests['filename'], 'Real': y_real, 'Prediction': y_pred})
results['Outcome'] = results['Real'] == results['Prediction']

print(f'Accuracy on our testing set: {round(score, 2)*100}%')
 
results[results['Outcome'] == True]

Accuracy on our testing set: 91.0%


Unnamed: 0,Filename,Real,Prediction,Outcome
1,JumpingJack_8.json,1,1,True
2,BodyWeightSquats_15.json,2,2,True
3,BodyWeightSquats_4.json,2,2,True
4,BodyWeightSquats_19.json,2,2,True
5,JumpingJack_4.json,1,1,True
6,PushUps_13.json,0,0,True
7,BodyWeightSquats_8.json,2,2,True
9,PushUps_12.json,0,0,True
10,JumpingJack_5.json,1,1,True
12,BodyWeightSquats_5.json,2,2,True


# Best Model

In [67]:
#Instantiating the model

best_model = SVC(C=0.1, gamma=1, kernel='poly')
  
best_model.fit(X_scaled, y)

#Defining and scaling X_real from our testing data
X_real = tests.drop(columns=['class', 'filename']).copy()
X_real_scaled = scaler.transform(X_real)
y_real = tests['class'].copy()

#Predicting and evaluating model on our testing data
y_pred = search_svc.predict(X_real_scaled)
score = accuracy_score(y_real, y_pred)
results = pd.DataFrame({'Filename': tests['filename'], 'Real': y_real, 'Prediction': y_pred})
results['Outcome'] = results['Real'] == results['Prediction']

print(f'Accuracy on our testing set: {round(score, 2)*100}%')
 
results[results['Outcome'] == False]


Accuracy on our testing set: 91.0%


Unnamed: 0,Filename,Real,Prediction,Outcome
0,Lunges_7.json,3,2,False
8,BodyWeightSquats_9.json,2,3,False
11,BodyWeightSquats_18.json,2,3,False
21,Lunges_1.json,3,2,False
48,Lunges_5.json,3,2,False


In [68]:
#Create Pipeline
pipeline = Pipeline([
    ('scaling', MinMaxScaler()),
    ('svc', SVC(C=0.1, gamma=1, kernel='poly'))])

#Train Pipeline
pipeline_trained = pipeline.fit(X,y)

#Score model
pipeline_trained.score(X_real, y_real)

0.9122807017543859

In [69]:
#Export pipeline as pickle file
with open("svc_coachai.pkl", "wb") as file:
    pickle.dump(pipeline_trained, file)