In [83]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline  
from sklearn import preprocessing 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from __future__ import division
from sklearn.decomposition import TruncatedSVD
from sklearn.utils import shuffle

In [84]:
df = pd.read_json('fit_modified.json', orient = 'records', dtype={"A":str, "B":list})
df = df.iloc[np.random.permutation(len(df))]
data = df[['exercisename','synergists','target','execution']]
group = df['functional_muscle_group'].apply(pd.Series)
label = group.rename(columns = lambda x : 'label' + str(x))
label.columns = ['label']

In [61]:
data.head()

Unnamed: 0,exercisename,synergists,target,execution
90,Lever Lying Hip Abduction,"[Tensor FasciaeLatae, Gluteus Medius, Gluteus ...",[],Move legs away from one another by abduction h...
32,Lying Straight Leg Raise,"[Incline Straight Leg Raise, InclineLeg-Hip Ra...",[Lying Leg Raise],"Keeping knees straight, raise legs by flexing ..."
156,Cable Seated Pullover,"[PectoralisMajor, Sternal, Triceps, LongHead, ...",[Latissimus Dorsi],"With elbows fixed with slight bend, pull cable..."
147,Barbell Straight Leg Deadlift,"[Hamstrings, Gluteus Maximus, Adductor Magnus]",[Erector Spinae],"With knees straight, lower bar toward top of f..."
254,Weighted Side Bend (on stability ball),"[Quadratuslumborum, Psoas major, Iliocastalis ...",[Obliques],Raise side of torso up by laterally flexing wa...


In [85]:
#unlist synergists and target columns
data['synergists'] = data['synergists'].fillna("").apply(lambda x: " ".join(x))
data['target'] = data['target'].fillna("").apply(lambda x: " ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [86]:
train = data.apply(lambda x: ' '.join(x.values.tolist()), axis=1)
train.head()

11     Dumbbell Kickback  Triceps Brachii Extend arm ...
47     Lever Shoulder External Rotation (plate loaded...
127    Cable Stiff Leg Deadlift Gluteus Maximus Adduc...
40     Weighted Hanging Straight Leg Raise Tensor Fas...
13     Sled Hack Calf Press Soleus Gastrocnemius Push...
dtype: object

In [87]:
#initialize TFIDF vectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2),stop_words="english")
train_tfidf = vectorizer.fit_transform(train)
from sklearn.decomposition import TruncatedSVD
from sklearn.utils import shuffle
# reduce features using SVD
SVD = TruncatedSVD(n_components=100, n_iter=10, random_state=0)
train = SVD.fit_transform(train_tfidf)

In [88]:
label = pd.Series(label['label'],dtype='category')
label.cat.categories = [0,1,2,3,4,5,6]

In [89]:
#label binizer
lb = preprocessing.LabelBinarizer()
lb.fit([0,1,2,3,4,5,6])
label = lb.transform(label)

In [90]:
# separate data into training and test
X_train, X_test, y_train, y_test = train_test_split(train, label, test_size=0.2, random_state=0)
print X_train.shape
print X_test.shape

(246, 100)
(62, 100)


In [99]:
# tensorflow setup
numFeatures = X_train.shape[1]
numLabels = 7
numEpochs = 100000
learningRate = tf.train.exponential_decay(learning_rate=0.01,
                                          global_step= 1,
                                          decay_steps=X_train.shape[0],
                                          decay_rate= 0.9,
                                          staircase=True)


X = tf.placeholder(tf.float32, [None, numFeatures])
Y = tf.placeholder(tf.float32, [None, numLabels])

weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
                                       mean=0,
                                       stddev=(np.sqrt(6/numFeatures+
                                                         numLabels+1)),
                                       name="weights"))

bias = tf.Variable(tf.random_normal([1,numLabels],
                                    mean=0,
                                    stddev=(np.sqrt(6/numFeatures+numLabels+1)),
                                    name="bias"))

In [100]:
# tensorflow operation
init = tf.global_variables_initializer()

apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")

cost_OP = tf.nn.l2_loss(activation_OP-Y, name="squared_error_cost")
training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)

In [101]:
# tensorflow session
sess = tf.Session()
sess.run(init)

correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(Y,1))
accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))


cost = 0
diff = 1
epoch_values=[]
accuracy_values=[]
cost_values=[]

# Training epochs
for i in range(numEpochs):
    if i > 1 and diff < .0001:
        print("change in cost %g; convergence."%diff)
        break
    else:
        # Run training step
        step = sess.run(training_OP, feed_dict={X: X_train, Y: y_train})
        # Report occasional stats
        if i % 10 == 0:
            # Add epoch to epoch_values
            epoch_values.append(i)
            # Generate accuracy stats on test data
            train_accuracy, newCost = sess.run(
                [accuracy_OP, cost_OP], 
                feed_dict={X: X_train, Y: y_train}
            )
            # Add accuracy to live graphing variable
            accuracy_values.append(train_accuracy)
            # Add cost to live graphing variable
            cost_values.append(newCost)
            
            # Re-assign values for variables
            diff = abs(newCost - cost)
            cost = newCost

            #generate print statements
            print("step %d, training accuracy %g"%(i, train_accuracy))
            print("step %d, cost %g"%(i, newCost))
            print("step %d, change in cost %g"%(i, diff))

step 0, training accuracy 0.138211
step 0, cost 357.849
step 0, change in cost 357.849
step 10, training accuracy 0.121951
step 10, cost 304.876
step 10, change in cost 52.9733
step 20, training accuracy 0.113821
step 20, cost 270.312
step 20, change in cost 34.5636
step 30, training accuracy 0.117886
step 30, cost 244.357
step 30, change in cost 25.9547
step 40, training accuracy 0.121951
step 40, cost 217.976
step 40, change in cost 26.3816
step 50, training accuracy 0.150407
step 50, cost 191.479
step 50, change in cost 26.4967
step 60, training accuracy 0.170732
step 60, cost 169.897
step 60, change in cost 21.5822
step 70, training accuracy 0.195122
step 70, cost 152.858
step 70, change in cost 17.0392
step 80, training accuracy 0.191057
step 80, cost 140.18
step 80, change in cost 12.6777
step 90, training accuracy 0.207317
step 90, cost 131.485
step 90, change in cost 8.69479
step 100, training accuracy 0.207317
step 100, cost 125.576
step 100, change in cost 5.90929
step 110, t

In [102]:
# How well do we perform on held-out test data?
print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, 
                                                     feed_dict={X: X_test, 
                                                                Y: y_test})))

final accuracy on test set: 0.870968
