In [1]:
import pandas as pd
import json
from pprint import pprint
import numpy as np
from os import listdir
from os.path import isfile, join

In [34]:
def get_data(filename:str):
    # 
    with open(filename) as f:
        json_file = json.load(f)
    
    data = json_file['people'][0]['pose_keypoints_2d']
    assert len(data) == 75
    return data

def pipeline(path, label):
    
    onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
    df = pd.DataFrame(columns=list(range(75)))
    for i, file in enumerate(onlyfiles):
        df.loc[i] = get_data(path+file)
    df['label'] = label
    return df


def select_frame(df, single=True):
    # Take df where each row is a frame in the video and return
    # either 1 still frame or multiple still frames
    deltas = []
    for i in range(len(df)-1,0,-1):
        #print(i)
        current_ = df.iloc[i]
        next_ = df.iloc[i-1]
        deltas.append(np.mean(abs(next_ - current_)))
        
    if single:
        # Single Frame
        print(min(deltas))
        index = deltas.index(min(deltas))
        print(index)
        return df.iloc[index]
    
    else:
        # Multiple Frames
        SD = np.std(deltas)
        #print(SD)
        #print(deltas)
        ds = deltas < SD*0.5
        df = df[:-1]
        #print(ds)
        return df[ds]
        

good_data = pipeline('warrior/', label=1)
bad_data = pipeline('warrior_incorrect/', label=0)
#get_data('warrior/warrior_000000000039_keypoints.json', classification = 'good')
    

In [35]:
select_frame(good_data, single=False)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66,67,68,69,70,71,72,73,74,label
8,666.639,212.074,0.962055,696.061,255.133,0.89826,649.029,253.176,0.7994,574.501,...,517.704,607.883,0.605212,521.634,603.866,0.599607,578.454,601.934,0.749427,1
11,664.659,204.242,0.91858,693.98,257.215,0.885814,646.967,257.129,0.815086,572.592,...,515.763,611.725,0.59581,521.622,603.913,0.595275,576.509,601.94,0.750996,1
13,666.636,212.088,0.962309,696.064,255.128,0.896407,649.024,251.298,0.798257,572.646,...,517.691,609.766,0.605346,521.631,603.876,0.598103,578.443,601.935,0.747661,1
27,666.636,212.087,0.96128,696.029,255.155,0.899072,649.015,253.2,0.794211,572.642,...,515.762,609.787,0.597235,521.611,603.889,0.585286,578.453,601.962,0.748625,1
32,664.655,204.232,0.917431,693.962,257.224,0.879124,646.956,257.128,0.812736,572.604,...,515.751,611.714,0.592809,521.612,603.904,0.594426,576.503,601.938,0.747395,1
43,664.639,202.309,0.910863,692.074,257.194,0.86571,645.079,257.048,0.805191,572.634,...,513.76,609.79,0.599202,517.727,603.886,0.582484,574.581,601.916,0.750659,1
64,666.57,200.305,0.924399,695.894,257.126,0.894898,649.009,255.18,0.830653,574.505,...,513.814,609.798,0.587698,519.695,603.898,0.572097,576.513,601.926,0.747085,1
70,664.64,210.062,0.953982,695.908,257.075,0.899184,648.959,255.216,0.829889,572.637,...,517.684,609.793,0.596101,521.636,603.894,0.591148,578.481,601.888,0.735068,1
77,662.721,202.267,0.881897,693.975,257.184,0.884116,646.97,257.1,0.823146,572.647,...,515.773,611.732,0.592266,521.628,603.921,0.585533,578.433,601.919,0.731756,1
82,664.73,202.285,0.917078,693.997,257.202,0.884436,647.043,257.032,0.815918,572.61,...,513.822,609.814,0.600298,519.686,603.904,0.583588,576.464,601.909,0.740236,1


In [25]:
full_data = pd.concat([good_data,bad_data])
full_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66,67,68,69,70,71,72,73,74,label
0,664.68,204.282,0.918086,692.073,259.115,0.875232,646.98,257.194,0.83214,572.597,...,515.76,609.804,0.599547,521.606,603.885,0.594741,576.51,601.873,0.755656,1
1,664.665,204.246,0.918837,692.09,259.068,0.875573,646.958,257.168,0.822406,572.624,...,515.737,611.718,0.592743,521.6,603.895,0.596312,576.498,601.908,0.75717,1
2,664.684,206.193,0.931179,693.983,257.119,0.87567,647.023,255.218,0.807673,572.645,...,517.692,611.715,0.589091,521.646,603.913,0.586224,578.445,601.916,0.748488,1
3,666.623,212.076,0.964515,696.038,255.15,0.89779,649.037,253.21,0.80095,574.538,...,515.785,609.764,0.605959,521.614,603.875,0.596971,578.452,601.928,0.742447,1
4,666.646,212.079,0.957779,696.036,255.137,0.896309,649.01,251.283,0.797011,574.496,...,515.775,609.786,0.600176,521.619,603.891,0.589176,578.461,601.957,0.746312,1


In [29]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

x = full_data.drop(columns=['label'])
y = full_data['label']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
accuracy_score(y_test, preds)




1.0