In [16]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline  
from sklearn import preprocessing 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import metrics
from __future__ import division
from sklearn.decomposition import TruncatedSVD
from sklearn.utils import shuffle

In [31]:
df = pd.read_json('fit_modified.json', orient = 'records', dtype={"A":str, "B":list})
data = df[['exercisename','synergists','target','execution']]
group = df['functional_muscle_group'].apply(pd.Series)
label = group.rename(columns = lambda x : 'label' + str(x))
label.columns = ['label']

In [3]:
data.head()

Unnamed: 0,exercisename,synergists,target,execution
0,Safety Barbell Standing Leg Calf Raise,[Soleus],[Gastrocnemius],Raise heels by extending ankles as high as pos...
1,Cable Triceps Extension (with rope),[],[Triceps Brachii],Raise ends of rope overhead by extending forea...
2,Safety Bar Seated Calf Raise,[Gastrocnemius],[Soleus],Lower heels by bending ankles until calves are...
3,Sled 45° Reverse Calf Raise (plate loaded),[],[Tibialis Anterior],Pull forefoot of both feet up and back toward ...
4,Sled 45° Reverse Calf Raise,[],[Tibialis Anterior],Pull forefoot of both feet up and back toward ...


In [32]:
#unlist synergists and target columns
data['synergists'] = data['synergists'].fillna("").apply(lambda x: " ".join(x))
data['target'] = data['target'].fillna("").apply(lambda x: " ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [33]:
train = data.apply(lambda x: ' '.join(x.values.tolist()), axis=1)
train.head()

0    Safety Barbell Standing Leg Calf Raise Soleus ...
1    Cable Triceps Extension (with rope)  Triceps B...
2    Safety Bar Seated Calf Raise Gastrocnemius Sol...
3    Sled 45° Reverse Calf Raise (plate loaded)  Ti...
4    Sled 45° Reverse Calf Raise  Tibialis Anterior...
dtype: object

In [34]:
#initialize TFIDF vectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2),stop_words="english")
train_tfidf = vectorizer.fit_transform(train)
from sklearn.decomposition import TruncatedSVD
from sklearn.utils import shuffle
# reduce features using SVD
SVD = TruncatedSVD(n_components=100, n_iter=10, random_state=0)
train = SVD.fit_transform(train_tfidf)

In [35]:
label = pd.Series(label['label'],dtype='category')
label.cat.categories = [0,1,2,3,4,5,6]

In [36]:
#label binizer
lb = preprocessing.LabelBinarizer()
lb.fit([0,1,2,3,4,5,6])
label = lb.transform(label)

In [37]:
# separate data into training and test
X_train, X_test, y_train, y_test = train_test_split(train, label, test_size=0.2, random_state=0)
print X_train.shape
print X_test.shape

(246, 100)
(62, 100)


In [42]:
# tensorflow setup
numFeatures = X_train.shape[1]
numLabels = 7
numEpochs = 10000
learningRate = tf.train.exponential_decay(learning_rate=0.005,
                                          global_step= 1,
                                          decay_steps=X_train.shape[0],
                                          decay_rate= 0.95,
                                          staircase=True)


X = tf.placeholder(tf.float32, [None, numFeatures])
Y = tf.placeholder(tf.float32, [None, numLabels])

weights = tf.Variable(tf.random_normal([numFeatures,numLabels],
                                       mean=0,
                                       stddev=(np.sqrt(6/numFeatures+
                                                         numLabels+1)),
                                       name="weights"))

bias = tf.Variable(tf.random_normal([1,numLabels],
                                    mean=0,
                                    stddev=(np.sqrt(6/numFeatures+numLabels+1)),
                                    name="bias"))

In [43]:
# tensorflow operation
init = tf.global_variables_initializer()

apply_weights_OP = tf.matmul(X, weights, name="apply_weights")
add_bias_OP = tf.add(apply_weights_OP, bias, name="add_bias") 
activation_OP = tf.nn.sigmoid(add_bias_OP, name="activation")

cost_OP = tf.nn.l2_loss(activation_OP-Y, name="squared_error_cost")
training_OP = tf.train.GradientDescentOptimizer(learningRate).minimize(cost_OP)

In [44]:
# tensorflow session
sess = tf.Session()
sess.run(init)

correct_predictions_OP = tf.equal(tf.argmax(activation_OP,1),tf.argmax(Y,1))
accuracy_OP = tf.reduce_mean(tf.cast(correct_predictions_OP, "float"))


cost = 0
diff = 1
epoch_values=[]
accuracy_values=[]
cost_values=[]

# Training epochs
for i in range(numEpochs):
    if i > 1 and diff < .0001:
        print("change in cost %g; convergence."%diff)
        break
    else:
        # Run training step
        step = sess.run(training_OP, feed_dict={X: X_train, Y: y_train})
        # Report occasional stats
        if i % 10 == 0:
            # Add epoch to epoch_values
            epoch_values.append(i)
            # Generate accuracy stats on test data
            train_accuracy, newCost = sess.run(
                [accuracy_OP, cost_OP], 
                feed_dict={X: X_train, Y: y_train}
            )
            # Add accuracy to live graphing variable
            accuracy_values.append(train_accuracy)
            # Add cost to live graphing variable
            cost_values.append(newCost)
            
            # Re-assign values for variables
            diff = abs(newCost - cost)
            cost = newCost

            #generate print statements
            print("step %d, training accuracy %g"%(i, train_accuracy))
            print("step %d, cost %g"%(i, newCost))
            print("step %d, change in cost %g"%(i, diff))

            

# How well do we perform on held-out test data?
print("final accuracy on test set: %s" %str(sess.run(accuracy_OP, 
                                                     feed_dict={X: X_test, 
                                                                Y: y_test})))

step 0, training accuracy 0.117886
step 0, cost 290.937
step 0, change in cost 290.937
step 10, training accuracy 0.150407
step 10, cost 255.505
step 10, change in cost 35.4328
step 20, training accuracy 0.170732
step 20, cost 221.01
step 20, change in cost 34.4946
step 30, training accuracy 0.203252
step 30, cost 193.754
step 30, change in cost 27.2563
step 40, training accuracy 0.223577
step 40, cost 174.986
step 40, change in cost 18.7673
step 50, training accuracy 0.227642
step 50, cost 162.083
step 50, change in cost 12.9033
step 60, training accuracy 0.239837
step 60, cost 152.82
step 60, change in cost 9.26288
step 70, training accuracy 0.239837
step 70, cost 145.89
step 70, change in cost 6.93027
step 80, training accuracy 0.239837
step 80, cost 140.542
step 80, change in cost 5.34773
step 90, training accuracy 0.235772
step 90, cost 136.328
step 90, change in cost 4.21429
step 100, training accuracy 0.239837
step 100, cost 132.96
step 100, change in cost 3.36783
step 110, trai