In [1]:
import pandas as pd
import json
from pprint import pprint
import numpy as np
from os import listdir
from os.path import isfile, join

In [36]:
def get_data(filename:str):
    with open(filename) as f:
        json_file = json.load(f)
    
    data = json_file['people'][0]['pose_keypoints_2d']
    assert len(data) == 75
    return data

def pipeline(path, label):
    
    onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
    df = pd.DataFrame(columns=list(range(75)))
    for i, file in enumerate(onlyfiles):
        df.loc[i] = get_data(path+file)
    df['label'] = label
    return df


def select_frame(df, single=True):
    # Take df where each row is a frame in the video and return
    # either 1 still frame or multiple still frames
    deltas = []
    for i in range(len(df)-1,0,-1):
        # iterate in reverse order as best poses often last
        current_ = df.iloc[i]
        next_ = df.iloc[i-1]
        deltas.append(np.mean(abs(next_ - current_)))
        
    if single:
        # Single Frame
        index = deltas.index(min(deltas))
        print(index)
        return df.iloc[index]
    else:
        # Multiple Frames
        SD = np.std(deltas)
        ds = deltas < SD*0.5
        return df[:-1][ds]
        

good_data = pipeline('warrior/', label=1)
bad_data = pipeline('warrior_incorrect/', label=0)
#get_data('warrior/warrior_000000000039_keypoints.json', classification = 'good')
    

In [38]:
select_frame(good_data, single=True)

110


0        664.602000
1        200.388000
2          0.905580
3        692.056000
4        257.219000
5          0.871387
6        645.063000
7        257.146000
8          0.817554
9        572.615000
10       237.570000
11         0.807360
12       488.310000
13       214.018000
14         0.835766
15       733.237000
16       257.207000
17         0.821766
18       817.453000
19       261.029000
20         0.845544
21       884.138000
22       266.975000
23         0.829562
24       701.842000
25       413.798000
26         0.749091
27       666.676000
28       413.863000
29         0.742982
            ...    
46       194.501000
47         0.728655
48       674.474000
49       194.491000
50         0.910820
51         0.000000
52         0.000000
53         0.000000
54       703.846000
55       204.224000
56         0.786993
57       915.357000
58       599.947000
59         0.739990
60       917.295000
61       588.254000
62         0.824287
63       886.047000
64       588.235000


In [25]:
full_data = pd.concat([good_data,bad_data])
full_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,66,67,68,69,70,71,72,73,74,label
0,664.68,204.282,0.918086,692.073,259.115,0.875232,646.98,257.194,0.83214,572.597,...,515.76,609.804,0.599547,521.606,603.885,0.594741,576.51,601.873,0.755656,1
1,664.665,204.246,0.918837,692.09,259.068,0.875573,646.958,257.168,0.822406,572.624,...,515.737,611.718,0.592743,521.6,603.895,0.596312,576.498,601.908,0.75717,1
2,664.684,206.193,0.931179,693.983,257.119,0.87567,647.023,255.218,0.807673,572.645,...,517.692,611.715,0.589091,521.646,603.913,0.586224,578.445,601.916,0.748488,1
3,666.623,212.076,0.964515,696.038,255.15,0.89779,649.037,253.21,0.80095,574.538,...,515.785,609.764,0.605959,521.614,603.875,0.596971,578.452,601.928,0.742447,1
4,666.646,212.079,0.957779,696.036,255.137,0.896309,649.01,251.283,0.797011,574.496,...,515.775,609.786,0.600176,521.619,603.891,0.589176,578.461,601.957,0.746312,1


In [29]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

x = full_data.drop(columns=['label'])
y = full_data['label']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
accuracy_score(y_test, preds)




1.0