In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline  
from sklearn import preprocessing 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from __future__ import division
from sklearn.decomposition import TruncatedSVD
from sklearn.utils import shuffle

In [31]:
df = pd.read_json('fit_modified_all.json', orient = 'records', dtype={"A":str, "B":list})
df = df.iloc[np.random.permutation(len(df))]
data = df[['exercisename','synergists','target','execution']]
group = df['functional_muscle_group'].apply(pd.Series)
label = group.rename(columns = lambda x : 'label' + str(x))
label.columns = ['label']

In [32]:
data.head()

Unnamed: 0,exercisename,synergists,target,execution
188,Twisting Crunch (on stability ball),[Obliques],[Weighted Twisting BallCrunch],Flex waist to raise upper torso. Return to ori...
85,Suspended Pull Through,"[Rectus Abdominis, Obliques, Iliopsoas, Tensor...",[Erector Spinae],Pull hips back while flexing spine in C shape....
79,Bent Knee Side Bridge Hip Abduction,"[Tensor FasciaeLatae, Gluteus Medius, Gluteus ...",[Straight Leg],"Keeping shank of lower leg on mat or floor, ra..."
326,Self-assisted Chest Dip,[Chest Dip],[Machine Assisted],"Lower body by bending arms, allowing elbows to..."
161,Smith Stiff Leg Deadlift,"[Erector Spinae, Adductor Magnus, Hamstrings]",[Gluteus Maximus],"With knees slightly bent, lower bar toward top..."


In [33]:
#unlist synergists and target columns
data['synergists'] = data['synergists'].fillna("").apply(lambda x: " ".join(x))
data['target'] = data['target'].fillna("").apply(lambda x: " ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [34]:
train = data.apply(lambda x: ' '.join(x.values.tolist()), axis=1)
train.head()

188    Twisting Crunch (on stability ball) Obliques W...
85     Suspended Pull Through Rectus Abdominis Obliqu...
79     Bent Knee Side Bridge Hip Abduction Tensor Fas...
326    Self-assisted Chest Dip Chest Dip Machine Assi...
161    Smith Stiff Leg Deadlift Erector Spinae Adduct...
dtype: object

In [35]:
#initialize TFIDF vectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2),stop_words="english")
train_tfidf = vectorizer.fit_transform(train)
from sklearn.decomposition import TruncatedSVD
from sklearn.utils import shuffle
# reduce features using SVD
SVD = TruncatedSVD(n_components=50, n_iter=10, random_state=0)
train = SVD.fit_transform(train_tfidf)

In [36]:
label = pd.Series(label['label'],dtype='category')
label.cat.categories = [0,1,2,3,4,5,6,7,8]

In [37]:
#label binizer
lb = preprocessing.LabelBinarizer()
lb.fit([0,1,2,3,4,5,6,7,8])
label = lb.transform(label)

In [38]:
# separate data into training and test
X_train, X_test, y_train, y_test = train_test_split(train, label, test_size=0.2, random_state=0)
print X_train.shape
print X_test.shape

(509, 50)
(128, 50)


In [39]:
# tensorflow setup
numFeatures = X_train.shape[1]
numLabels = 9
numEpochs = 10000
learningRate = tf.train.exponential_decay(learning_rate=0.01,
                                          global_step= 1,
                                          decay_steps=X_train.shape[0],
                                          decay_rate= 0.9,
                                          staircase=True)


X = tf.placeholder(tf.float32, [None, numFeatures])
Y = tf.placeholder(tf.float32, [None, numLabels])

weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
                                       mean=0,
                                       stddev=(np.sqrt(6/numFeatures+
                                                         numLabels+1)),
                                       name="weights"))

bias = tf.Variable(tf.random_normal([1,numLabels],
                                    mean=0,
                                    stddev=(np.sqrt(6/numFeatures+numLabels+1)),
                                    name="bias"))

In [40]:
# tensorflow operation
init = tf.global_variables_initializer()

apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")

cost_OP = tf.nn.l2_loss(activation_OP-Y, name="squared_error_cost")
training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)

In [41]:
# tensorflow session
sess = tf.Session()
sess.run(init)

correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(Y,1))
accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))


cost = 0
diff = 1
epoch_values=[]
accuracy_values=[]
cost_values=[]

# Training epochs
for i in range(numEpochs):
    if i > 1 and diff < .0001:
        print("change in cost %g; convergence."%diff)
        break
    else:
        # Run training step
        step = sess.run(training_OP, feed_dict={X: X_train, Y: y_train})
        # Report occasional stats
        if i % 10 == 0:
            # Add epoch to epoch_values
            epoch_values.append(i)
            # Generate accuracy stats on test data
            train_accuracy, newCost = sess.run(
                [accuracy_OP, cost_OP], 
                feed_dict={X: X_train, Y: y_train}
            )
            # Add accuracy to live graphing variable
            accuracy_values.append(train_accuracy)
            # Add cost to live graphing variable
            cost_values.append(newCost)
            
            # Re-assign values for variables
            diff = abs(newCost - cost)
            cost = newCost

            #generate print statements
            print("step %d, training accuracy %g"%(i, train_accuracy))
            print("step %d, cost %g"%(i, newCost))
            print("step %d, change in cost %g"%(i, diff))

step 0, training accuracy 0.115914
step 0, cost 1171.42
step 0, change in cost 1171.42
step 10, training accuracy 0.104126
step 10, cost 817.909
step 10, change in cost 353.509
step 20, training accuracy 0.10609
step 20, cost 643.26
step 20, change in cost 174.649
step 30, training accuracy 0.108055
step 30, cost 576.958
step 30, change in cost 66.3019
step 40, training accuracy 0.127701
step 40, cost 483.348
step 40, change in cost 93.6096
step 50, training accuracy 0.133595
step 50, cost 435.738
step 50, change in cost 47.61
step 60, training accuracy 0.111984
step 60, cost 348.104
step 60, change in cost 87.6343
step 70, training accuracy 0.151277
step 70, cost 275.944
step 70, change in cost 72.1599
step 80, training accuracy 0.165029
step 80, cost 261.021
step 80, change in cost 14.9235
step 90, training accuracy 0.165029
step 90, cost 254.955
step 90, change in cost 6.06566
step 100, training accuracy 0.166994
step 100, cost 251.049
step 100, change in cost 3.90593
step 110, trai

In [42]:
# How well do we perform on held-out test data?
print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, 
                                                     feed_dict={X: X_test, 
                                                                Y: y_test})))

final accuracy on test set: 0.929688
