In [1]:
#Import files
import os
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import random
from sklearn.model_selection import cross_val_score,cross_val_predict
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import export_graphviz

In [2]:
#the way the data is set up it connects bones to joints, so the shin is actually placed at the top of the shin where the knee is
#below are static variables for names of the bones
LEFT_KNEE = "Ryan:LShin"
RIGHT_KNEE = "Ryan:RShin"
LEFT_HIP = "Ryan:LThigh"
RIGHT_HIP = "Ryan:RThigh"
PELVIS = "Ryan:Hip"
NECK = "Ryan:Neck"
AB = "Ryan:Ab"
CHEST = "Ryan:Chest"
HEAD = "Ryan:Head"
LEFT_SHOULDER = "Ryan:LUArm"
RIGHT_SHOULDER = "Ryan:RUArm"
LEFT_ELBOW = "Ryan:LFArm"
RIGHT_ELBOW = "Ryan:RFArm"
RIGHT_WRIST = "Ryan:RHand"
LEFT_WRIST = "Ryan:LHand"
RIGHT_ANKLE = "Ryan:RFoot"
LEFT_ANKLE = "Ryan:LFoot"
RIGHT_TOE = "Ryan:RToe"
LEFT_TOE = "Ryan:LToe"

In [3]:
#functional tools for thesis


def angle(a,b,c):
    """
    compute angle between 3 points in 3dim space
    the reason we do a -> b and then c -> b is because of the directions we care about.
    so standing straight up should be 180 degrees. If we do a -> b and b -> c, this angle
    becomes zero.
    """
    #due to weird jupyter notebook bug, must force these to be float64 because float32 is bugged.
    a = np.asarray(a,dtype=np.float64)
    b = np.asarray(b,dtype=np.float64)
    c = np.asarray(c,dtype=np.float64)
    ab = (a - b) / np.linalg.norm(a - b)
    cb = (c - b) / np.linalg.norm(c - b)
    theta = np.arccos(np.clip(np.dot(ab,cb),-1,1))
    return np.degrees(theta)

def alignment(a,b):
    #function to check alignment of joints/bones
    #the way this works is that only one axis (x,y or z) should yield different results
    #for example, if my shoulders are aligned and I am facing the camera, the z and y axis should 
    #	have the same value, but the x axis should yield differing value
    #	however, if I am facing the side, the y axis and x axis will have the same value and the
    #	z axis will have a differing value.
    #with this in mind, we look at which axis has the largest delta from each joint, and exclude
    #	that from the computation because it can be assumed that delta is supposed to happen
    #	then we take the sum of the delta of the other two axis for the result
    #one of the other two axes should almost always be zero, and perfect alignment would have both 
    #	as zero. 
    difference = abs(a - b)
    max_index = np.argmax(difference)
    difference[max_index] = 0
    return np.sum(difference)

def get_bone_location(frame,label_name):
    #get the location in space of the bone in question
    bone = frame['Bone'][label_name]["Position"]
    bone_location = np.asarray([bone["X"],bone["Y"],bone["Z"]],dtype=np.float32)
    return bone_location

def compute_jerk(series,dt):
    #compute the jerk of a timeseries
    series = np.asarray(series)
    velocity = np.diff(series)
    acceleration = np.diff(velocity)
    jerk = np.diff(acceleration)
    return jerk

def create_data_matrix(filename):
    #create the data matrix for the optitrack data file
    with open(filename,"r") as f:
        lines = f.readlines()
        if len(lines) == 0:
            print "File Empty"
            return None
        meta = lines[0].strip().split(",")
        columns = [col.strip().split(",") for col in lines[2:7]]
        markers = columns[0][2:]
        labels = columns[1][2:]
        numbers = columns[2][2:]
        pos_bool = columns[3][2:]
        series = columns[4][2:]
        tuples = list(zip(markers,labels,pos_bool,series))
        index = pd.MultiIndex.from_tuples(tuples,names=["Marker","Label","Position","Columns"])
        matrix = np.asarray([line.strip().split(",") for line in lines[7:]])
        frame_nums = matrix.T[0]
        dt = np.diff(np.asarray(matrix.T[1],dtype=np.float64))
        t = np.append([0],np.cumsum(dt))
        data = np.asarray([row[2:] for row in matrix])
    return frame_nums,dt,t,data,index


In [4]:
#get the csv files
directory = "CSV"
test = [x for x in os.walk(directory)]
csv_files = [x for x in test[0][2] if x.endswith(".csv")]

In [5]:
#create pandas dataframe with the labels
dylan = pd.read_csv("Labels/dylan_labels.csv",names=["filename","d_label"])
dylan = dylan[~dylan.filename.str.contains("Lunge")]
dylan = dylan.set_index("filename")
trevor = pd.read_csv("Labels/trevor_labels.csv",names=["filename","t_label"])
trevor = trevor[~trevor.filename.str.contains("Lunge")]
trevor = trevor.set_index("filename")

In [6]:
#remove all files where the personal trainers disagree
df = trevor.join(dylan)
df = df.loc[((df['t_label'] == 1) & (df['d_label'] == 1)) | ((df['t_label'] == -1) & (df['d_label'] == -1))]
good_files = [("CSV/" + x.split(".")[0] + ".csv",df.loc[x]['t_label']) for x in df.index]

In [None]:
#initialize feature and label vector
import numpy as np
X = []
Y = []
for f,label in good_files:
    #generate data matrix
    frame_nums,dt,t,data,index = create_data_matrix(f)
    #initialize timeseries arrays
    Lknee_angles = []
    Rknee_angles = []
    hip_y_axis = []
    shoulder_align = []
    hip_align = []
    #create timeseries data for the features
    for row in data:
        frame = pd.Series(row,index=index)
        Rankle = get_bone_location(frame,RIGHT_ANKLE)
        Rknee = get_bone_location(frame,RIGHT_KNEE)
        Rhip = get_bone_location(frame,RIGHT_HIP)
        Rknee_angle = angle(Rankle,Rknee,Rhip)
        Lankle = get_bone_location(frame,LEFT_ANKLE)
        Lknee = get_bone_location(frame,LEFT_KNEE)
        Lhip = get_bone_location(frame,LEFT_HIP)
        Lknee_angle = angle(Lankle,Lknee,Lhip)
        Lknee_angles.append(Lknee_angle)
        Rknee_angles.append(Rknee_angle)
        pelvis = get_bone_location(frame,PELVIS)
        Lshoulder = get_bone_location(frame,LEFT_SHOULDER)
        Rshoulder = get_bone_location(frame,RIGHT_SHOULDER)
        shoulder_align.append(alignment(Lshoulder,Rshoulder))
        hip_align.append(alignment(Lhip,Rhip))
        hip_y_axis.append(pelvis[1])
    #calculate the features extracted for the squat
    #print Lknee_angles
    lowest_hip_point = hip_y_axis.index(min(hip_y_axis))
    Lknee_climax = Lknee_angles[lowest_hip_point]
    Lknee_max_jolt = max(compute_jerk(Lknee_angles,dt))
    Rknee_climax = Rknee_angles[lowest_hip_point]
    Rknee_max_jolt = max(compute_jerk(Rknee_angles,dt))
    hip_alignment_metric = np.sum(hip_align)/float(t[-1])
    shoulder_alignment_metric = np.sum(shoulder_align)/float(t[-1])
    knee_metric = np.mean([abs(x[0] - x[1]) for x in zip(Lknee_angles,Rknee_angles)])
    features = [Lknee_climax,Rknee_climax,Lknee_max_jolt,Rknee_max_jolt,knee_metric,hip_alignment_metric,shoulder_alignment_metric]
    X.append(features)
    Y.append(label)

In [None]:
#initialize ML functions
svm = SVC(gamma='auto')
forest = RandomForestClassifier()
tree = DecisionTreeClassifier()
LR = LogisticRegression()
#initialize arrays to hold cross validation results
i = 0
tree_res = []
svm_res = []
forest_res = []
LR_res = []
while i < 100:
    tree_res.append(cross_val_score(tree,X,Y,cv=10))
    svm_res.append(cross_val_score(svm,X,Y,cv=10))
    forest_res.append(cross_val_score(forest,X,Y,cv=10))
    LR_res.append(cross_val_score(LR,X,Y,cv=10))
    i += 1
#display average accuracy from 1o fold cross validation
print "CART average accuracy:\t\t\t",np.mean(tree_res)
print "SVM average accuracy:\t\t\t",np.mean(svm_res)
print "Random Forest average accuracy:\t\t\t",np.mean(forest_res)
print "Logistic Regression average accuracy:\t\t\t",np.mean(LR_res)



In [None]:
#get the confidence scores of svm and logistic regression

svm = SVC(gamma='auto',probability=True)
LR = LogisticRegression()

i = 0
svm_res = []
LR_res = []
while i < 100:
    svm_res.append(cross_val_predict(svm,X,Y,cv=10,method='predict_proba'))
    LR_res.append(cross_val_predict(LR,X,Y,cv=10,method='predict_proba'))
    i+=1

wrong_certainty = 0
right_certainty = 0
wrong_count = 0
right_count = 0 

for res in LR_res:
    for i,pred in enumerate(res):
        actual = Y[i]
        if pred[0] > pred[1]:
            val = -1
        else:
            val = 1
        if actual == val:
            right_count += 1
            if val == 1:
                right_certainty += pred[1]
            else:
                right_certainty += pred[0]
        else:
            wrong_count += 1
            if val == 1:
                wrong_certainty += pred[1]
            else:
                wrong_certainty += pred[0]

print "Logistic Regression Avg Confidence When Wrong:",wrong_certainty/float(wrong_count)
print "Logistic Regression Avg Confidence When Right:",right_certainty/float(right_count)

wrong_certainty = 0
right_certainty = 0
wrong_count = 0
right_count = 0 

for res in svm_res:
    for i,pred in enumerate(res):
        actual = Y[i]
        if pred[0] > pred[1]:
            val = -1
        else:
            val = 1
        if actual == val:
            right_count += 1
            if val == 1:
                right_certainty += pred[1]
            else:
                right_certainty += pred[0]
        else:
            wrong_count += 1
            if val == 1:
                wrong_certainty += pred[1]
            else:
                wrong_certainty += pred[0]

print "SVM Avg Confidence When Wrong:",wrong_certainty/float(wrong_count)
print "SVM Avg Confidence When Right:",right_certainty/float(right_count)

In [261]:
#create the full tree image for display

tree.fit(X,Y)

name = "tree.dot"
png = "tree.png"
command =  "dot -Tpng " + name + " -o " + png
export_graphviz(tree,out_file=name,feature_names=["Left Knee at Climax","Right Knee at Climax","Left Knee Max Jerk","Right Knee Max Jerk","Knee Symmetry Score","Hip Alignment Score","Shoulder Alignment Score"])
os.system(command)

0