In [2]:
# Load Dataset
import pandas as pd
import numpy as np

# Hide warnings
import warnings
warnings.filterwarnings('ignore')

In [3]:
apex_df = pd.read_csv('data/apex.csv')

In [4]:
#decision trees good at overfitting small data points
# ONE SHOT LEARNING- learn a differentiator rather than a predictor.  
# maximize the difference between good score and bad score
# transfer learning
# combination of both
# metrics, true positive, true negative, sensitivy, specificity, recall

### Feature Engineering

In [5]:
'''
Important features to look at for 'good' squats:
1) angle at hip
2) difference in x-coordinates of hip and ankle
3) difference in y-coordinates of hip and knee
'''

features = apex_df.copy()
features.head()
features['xcoord_lhip_ank'] = features['leftHip_x'] - features['leftAnkle_x']
features['ycoord_lhip_knee'] = features['leftHip_y'] - features['leftKnee_y']
features['left_hip_angle'] = np.arctan(features['ycoord_lhip_knee']/features['xcoord_lhip_ank'])
features['left_hip_angle']

features['xcoord_rhip_ank'] = features['rightHip_x'] - features['rightAnkle_x']
features['ycoord_rhip_knee'] = features['rightHip_y'] - features['rightKnee_y']
features['right_hip_angle'] = np.arctan(features['ycoord_rhip_knee']/features['xcoord_rhip_ank'])
features['right_hip_angle']

features.head()

Unnamed: 0,entry,nose_x,nose_y,leftShoulder_x,leftShoulder_y,rightShoulder_x,rightShoulder_y,leftHip_x,leftHip_y,rightHip_x,...,leftAnkle_y,rightAnkle_x,rightAnkle_y,good,xcoord_lhip_ank,ycoord_lhip_knee,left_hip_angle,xcoord_rhip_ank,ycoord_rhip_knee,right_hip_angle
0,0,0.117421,0.308928,0.143789,0.286555,0.14281,0.281268,0.198155,0.230542,0.195815,...,0.25427,0.257482,0.266315,1,-0.045358,-0.044023,0.770463,-0.061666,-0.05055,0.686657
1,1,0.136397,0.262684,0.159529,0.264952,0.163765,0.240159,0.220993,0.251286,0.228438,...,0.249946,0.258989,0.243413,1,-0.035139,-0.035057,0.784224,-0.030552,-0.048677,1.010298
2,2,0.04586,0.247471,0.079373,0.257763,0.080869,0.206781,0.186991,0.260724,0.187887,...,0.278864,0.353202,0.192719,1,-0.15846,-0.004642,0.029288,-0.165315,0.004289,-0.025942
3,3,0.253397,0.053853,0.275135,0.066398,0.279123,0.024032,0.34176,0.05858,0.32775,...,0.055766,0.385602,0.026955,1,-0.037507,-0.002227,0.05931,-0.057852,-0.001562,0.026992
4,4,0.098074,0.246265,0.121498,0.269973,0.122347,0.262603,0.183748,0.295707,0.182659,...,0.301003,0.236739,0.297524,1,-0.053714,0.028318,-0.485172,-0.05408,0.02776,-0.474242


### Decision Tree Classifier

In [6]:
'''
A decision tree is built on an entire dataset, using all the features/variables of interest.
Decision Tree Classifiers tend to overfit the small data points.
'''

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
y = features['good']
X = features.drop(['good'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
clf = DecisionTreeClassifier()
clf = clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)
score = clf.score(X_test, y_test)
print('Test Accuracy: ', score)

Test Accuracy:  1.0


### Random Forest Classifer

In [7]:
'''
Random Forest Classifiers choose randomly selects observations/rows
and specific features/variables to build multiple decision trees from and then averages the results.
'''
from sklearn.ensemble import RandomForestClassifier
y = features['good']
X = features.drop(['good'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
clf_rf = RandomForestClassifier()
clf_rf = clf_rf.fit(X_train,y_train)
y_pred_rf = clf_rf.predict(X_test)
score_rf = clf_rf.score(X_test, y_test)
print('Test Accuracy: ', score_rf)

Test Accuracy:  1.0


### Logistic Regression

In [8]:
'''
Logistic Regression conducts an analysis of the data when it is binary. 
In this case, it predicts the probability of getting a 'good' squat (1).
'''
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
y = features['good']
X = features.drop(['good'], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
clf_log = LogisticRegression()
clf_log = clf_log.fit(X_train,y_train)
y_pred_log = clf_log.predict(X_test)
score_log = clf_log.score(X_test, y_test)
print('Test Accuracy: ', score_log)

Test Accuracy:  0.75


In [46]:
[X_test.iloc[0, :]]

[entry               8.000000
 nose_x              0.092754
 nose_y              0.229193
 leftShoulder_x      0.123727
 leftShoulder_y      0.264783
 rightShoulder_x     0.123714
 rightShoulder_y     0.257277
 leftHip_x           0.211364
 leftHip_y           0.267717
 rightHip_x          0.217357
 rightHip_y          0.274746
 leftKnee_x          0.228780
 leftKnee_y          0.232047
 rightKnee_x         0.231432
 rightKnee_y         0.256007
 leftAnkle_x         0.286113
 leftAnkle_y         0.266896
 rightAnkle_x        0.283729
 rightAnkle_y        0.272017
 xcoord_lhip_ank    -0.074749
 ycoord_lhip_knee    0.035670
 left_hip_angle     -0.445245
 xcoord_rhip_ank    -0.066372
 ycoord_rhip_knee    0.018739
 right_hip_angle    -0.275168
 Name: 8, dtype: float64]

In [45]:
clf_log.predict(X_test.iloc[:2, :])[0]

1

### Sequential Model

In [10]:
'''
Installation of pip, tensorflow, keras:
run this in terminal to install pip:  curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
run this in terminal to install tensorflow: pip install --upgrade tensorflow
run this in terminal to install keras: pip install keras
'''
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from keras.layers import Dropout
from tensorflow.keras.layers import Dense

Using TensorFlow backend.


In [11]:
y = features['good']
x = features.drop(['good'], axis=1)
#pd.get_dummies(y).values
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42,shuffle=True)

# Core data structure in Keras is a model
# The model is an object in which we organize layers

model_k = Sequential() # instantiate empty Sequential model


# model contruction (architecture build computational graph)


model_k.add( Dense(units=64, input_dim = 25, activation='relu'))

model_k.add(Dense(32, activation=tf.nn.relu))

#model_k.add(Dense(16, activation=tf.nn.relu))

model_k.add(Dense(4, activation='sigmoid'))

model_k.add(Dense(units=1, activation='softmax') )


# For a binary classification problem 
#def mean_pred(y_true, y_pred):
 #   return keras.backend.mean(y_pred) 

model_k.compile(optimizer= 'rmsprop',
              loss='binary_crossentropy',
             metrics=['accuracy'])

history = model_k.fit(X_train, y_train, epochs =10, batch_size= 10)

# Evaluate the model Accuracy on test set

loss, accuracy = model_k.evaluate(X_test, y_test, batch_size=10)
#print('Test loss:', test_scores[0])
print('Test accuracy:', accuracy)


Train on 20 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.8333333


In [32]:
22/26

0.8461538461538461

In [19]:
[[features.iloc[1, :-1]]]

[[entry               1.000000
  nose_x              0.136397
  nose_y              0.262684
  leftShoulder_x      0.159529
  leftShoulder_y      0.264952
  rightShoulder_x     0.163765
  rightShoulder_y     0.240159
  leftHip_x           0.220993
  leftHip_y           0.251286
  rightHip_x          0.228438
  rightHip_y          0.236104
  leftKnee_x          0.216944
  leftKnee_y          0.286342
  rightKnee_x         0.218581
  rightKnee_y         0.284781
  leftAnkle_x         0.256133
  leftAnkle_y         0.249946
  rightAnkle_x        0.258989
  rightAnkle_y        0.243413
  good                1.000000
  xcoord_lhip_ank    -0.035139
  ycoord_lhip_knee   -0.035057
  left_hip_angle      0.784224
  xcoord_rhip_ank    -0.030552
  ycoord_rhip_knee   -0.048677
  Name: 1, dtype: float64]]

In [22]:
len([[features.iloc[, :-1]]])

1

In [30]:
model_k.predict([[features.iloc[25, :-1]]])[0][0]

1.0

In [27]:
features

Unnamed: 0,entry,nose_x,nose_y,leftShoulder_x,leftShoulder_y,rightShoulder_x,rightShoulder_y,leftHip_x,leftHip_y,rightHip_x,...,leftAnkle_y,rightAnkle_x,rightAnkle_y,good,xcoord_lhip_ank,ycoord_lhip_knee,left_hip_angle,xcoord_rhip_ank,ycoord_rhip_knee,right_hip_angle
0,0,0.117421,0.308928,0.143789,0.286555,0.14281,0.281268,0.198155,0.230542,0.195815,...,0.25427,0.257482,0.266315,1,-0.045358,-0.044023,0.770463,-0.061666,-0.05055,0.686657
1,1,0.136397,0.262684,0.159529,0.264952,0.163765,0.240159,0.220993,0.251286,0.228438,...,0.249946,0.258989,0.243413,1,-0.035139,-0.035057,0.784224,-0.030552,-0.048677,1.010298
2,2,0.04586,0.247471,0.079373,0.257763,0.080869,0.206781,0.186991,0.260724,0.187887,...,0.278864,0.353202,0.192719,1,-0.15846,-0.004642,0.029288,-0.165315,0.004289,-0.025942
3,3,0.253397,0.053853,0.275135,0.066398,0.279123,0.024032,0.34176,0.05858,0.32775,...,0.055766,0.385602,0.026955,1,-0.037507,-0.002227,0.05931,-0.057852,-0.001562,0.026992
4,4,0.098074,0.246265,0.121498,0.269973,0.122347,0.262603,0.183748,0.295707,0.182659,...,0.301003,0.236739,0.297524,1,-0.053714,0.028318,-0.485172,-0.05408,0.02776,-0.474242
5,5,0.105116,0.315273,0.137064,0.283923,0.136448,0.278682,0.194774,0.239821,0.196238,...,0.247978,0.256965,0.245755,1,-0.061458,-0.042169,0.601358,-0.060727,-0.043488,0.621462
6,6,0.195947,0.239623,0.201764,0.242434,0.201224,0.237515,0.219711,0.241363,0.223604,...,0.264797,0.259426,0.235906,1,-0.04455,-0.001866,0.041851,-0.035823,0.000248,-0.006923
7,7,0.131564,0.224439,0.146576,0.251678,0.145847,0.252313,0.191266,0.278307,0.193529,...,0.292226,0.260291,0.291308,1,-0.071534,0.015378,-0.211747,-0.066762,0.024242,-0.348307
8,8,0.092754,0.229193,0.123727,0.264783,0.123714,0.257277,0.211364,0.267717,0.217357,...,0.266896,0.283729,0.272017,1,-0.074749,0.03567,-0.445245,-0.066372,0.018739,-0.275168
9,9,0.045357,0.216075,0.087802,0.2521,0.086947,0.240818,0.205575,0.235612,0.204224,...,0.248067,0.333113,0.252504,1,-0.130147,0.006728,-0.051651,-0.128888,0.00176,-0.013653


### K Nearest Neighbors

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

In [14]:
'''
k nearest neighbors model works by taking a data point and looking at the ‘k’ closest labeled data points. 
The data point is then assigned the label of the majority of the ‘k’ closest points. In this case, we want
to take the features and label it as good or bad (1 or 0).
K-folds cross validation estimates the skill of the k nearest neighbors model. In this case, 
the data-set is split into k (5) groups and one group is used as the test set and the rest
are used as the training set.
'''

feat = features.copy()
#feat = feat.drop(['good'], axis=1)
feat_new = feat[['xcoord_lhip_ank','ycoord_lhip_knee','left_hip_angle','xcoord_rhip_ank','ycoord_rhip_knee','right_hip_angle', 'good']]
y = feat_new['good']
x = feat_new.drop(['good'], axis=1)
#Cluster the data

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1, stratify=y)

# Create KNN classifier
knn = KNeighborsClassifier(n_neighbors = 3)
# Fit the classifier to the data
knn.fit(X_train,y_train)
knn.predict(X_test)[0:5]
knn_score = knn.score(X_test, y_test)

#create a new KNN model
knn_cv = KNeighborsClassifier(n_neighbors=3)
#train model with cv of 5 
cv_scores = cross_val_score(knn_cv, x, y, cv=5)
#print each cv score (accuracy) and average them
print('Accuracy:', knn_score)
print('Cross Validation Mean:{}'.format(np.mean(cv_scores)))


Accuracy: 0.8333333333333334
Cross Validation Mean:0.78


In [15]:
#feat.shape
#kmeans = KMeans(n_clusters=2).fit(feat_new)
#centroids = kmeans.cluster_centers_
#print(centroids)


#kmeans = KMeans(n_clusters=2, random_state=42).fit(feat_std)
#labels = kmeans.labels_

#Glue back to originaal data
#feat['clusters'] = labels

#Add the column into our list
#feat(['clusters'])

#Lets analyze the clusters
#feat['good'].groupby(['clusters']).mean()