In [173]:
from sklearn.datasets import make_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import numpy as np
import boto3
import json
import pandas as pd
from io import StringIO


def select_frame(df, single=True):
    # Take df where each row is a frame in the video and return
    # either 1 still frame or multiple still frames
    #print(df.iloc[843])
    deltas = []
    for i in range(len(df)-100,100,-1):
        # iterate in reverse order as best poses often last
        current_ = df.iloc[i]
        next_ = df.iloc[i-1]
        deltas.append(np.mean(abs(next_ - current_)))
    SD = np.std(deltas)
        
    if single:
        # Single Frame
        for i in range(len(df)-1,0,-1):
            # iterate in reverse order as best poses often last
            current_ = df.iloc[i]
            next_ = df.iloc[i-1]
            d = np.mean(abs(next_ - current_)) 
            if d < 0.01*SD:
                print(f'frame index: {i}')
                frame = df.iloc[i]
                #print(len(frame))
                return list(frame)
    else:
        # Multiple Frames
        ds = deltas < SD*0.5
        return df[:-1][ds]
    
    
def getCSV(filepath):
    # pull csv file and add labels for modeling
    client = boto3.client('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')

    #client = boto3.client.get_Object('alignedstorage', filepath)
    bucket_name = 'alignedstorage'

    object_key = filepath
    csv_obj = client.get_object(Bucket=bucket_name, Key=object_key)
    body = csv_obj['Body']
    csv_string = body.read().decode('utf-8')

    df = pd.read_csv(StringIO(csv_string), index_col='Unnamed: 0')
    df['arm'] = df['knee1'] = df['knee2'] = df['hips'] = df['torso'] = 0
    if 'arm' in filepath:
        df['arm'] = 1
    elif 'knee1' in filepath:
        df['knee1'] = 1
    elif 'knee2' in filepath:
        df['knee2'] = 1
    elif 'hips' in filepath:
        df['hips'] = 1
    elif 'torso' in filepath:
        df['torso'] = 1
        
    frame = select_frame(df, single=True)
    return frame


def iterate_bucket_items(bucket):
    """
    Generator that iterates over all objects in a given s3 bucket
    
    See http://boto3.readthedocs.io/en/latest/reference/services/s3.html#S3.Client.list_objects_v2 
    for return data format
    :param bucket: name of s3 bucket
    :return: df of metadata for an object
    """

    #client = boto3.client('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')
    s3 = boto3.resource('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')
    bucket = s3.Bucket('alignedstorage')
    prefix_objs = bucket.objects.filter(Prefix="training_data")
    print(prefix_objs)
    full_df = []
    for obj in prefix_objs:
        key = obj.key
        #print(key)
        if 'csv' in key:
            single = getCSV(key)
            if single is None:
                continue
            #print(key)
            full_df.append(single)
    
    final = pd.DataFrame(np.asarray(full_df))
    return final


def multi_acc(real, preds):
    # accuracy for multi classification model
    correct = 0
    incorrect = 0
    for i,j in zip(real.values.flatten(), preds.values.flatten()):
        if i==j:
            correct += 1
        else:
            incorrect +=1
    return correct / (correct+incorrect)


def multimodel(full_df):
    from sklearn.model_selection import train_test_split
    # Takes DF with each row as best pose from a video
    # Fits multi-label model and returns evaluation metrics
    from sklearn.metrics import accuracy_score, f1_score
    #print(full_df.head())
    
    #X = full_df.drop(columns = ['arm','knee1','knee2','torso','hips'])
    #Y = full_df[['arm','knee1','knee2','torso','hips']]
    X = full_df.drop(columns = [75,76,77,78,79])
    Y = full_df[[75,76,77,78,79]]
    #print(X.head())
    #print(Y.head())
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
    
    # Train Model
    forest = RandomForestClassifier(n_estimators=100, random_state=1)
    multi_target_forest = MultiOutputClassifier(forest, n_jobs=-1)
    multi_target_forest.fit(X_train, Y_train)
    
    # Evaluate Model
    preds = pd.DataFrame(multi_target_forest.predict(X_test))
    print(Y_test)
    print(preds)
    #accuracy = multi_acc(Y_test, preds)
    accuracy = accuracy_score(Y_test.values.flatten(), preds.values.flatten())
    f1 = f1_score(Y_test.values.flatten(), preds.values.flatten())
    return accuracy, f1


In [165]:
#getCSV('training_data/warrior2_torso_1.csv')


In [157]:
final = iterate_bucket_items('alignedstorage')


s3.Bucket.objectsCollection(s3.Bucket(name='alignedstorage'), s3.ObjectSummary)
frame index: 399
training_data/warrior2_arms_1.csv
frame index: 424
training_data/warrior2_arms_2.csv
frame index: 793
training_data/warrior2_arms_3.csv
frame index: 310
training_data/warrior2_arms_4.csv
frame index: 269
training_data/warrior2_correct_3.csv
frame index: 1313
training_data/warrior2_correct_5.csv
frame index: 323
training_data/warrior2_hips_2.csv
frame index: 880
training_data/warrior2_hips_3.csv
frame index: 457
training_data/warrior2_hips_4.csv
frame index: 723
training_data/warrior2_knee1_1.csv
frame index: 275
training_data/warrior2_knee1_2.csv
frame index: 386
training_data/warrior2_knee1_3.csv
frame index: 666
training_data/warrior2_knee1_4.csv
frame index: 262
training_data/warrior2_knee1_6.csv
frame index: 467
training_data/warrior2_knee2_2.csv
frame index: 440
training_data/warrior2_knee2_4.csv
frame index: 493
training_data/warrior2_torso_1.csv
frame index: 344
training_data/warrior

In [159]:
final 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,75,76,77,78,79
0,686.182,151.357,0.873142,635.367,214.088,0.853269,574.554,214.042,0.759944,478.53,...,637.249,0.76544,386.429,611.764,0.573198,1.0,0.0,0.0,0.0,0.0
1,735.173,143.505,0.834527,682.357,190.566,0.857127,623.48,178.69,0.749006,529.513,...,637.205,0.690177,431.575,619.576,0.546692,1.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,760.728,174.85,0.892342,729.229,216.016,0.879971,666.624,223.84,0.771783,574.547,...,648.949,0.772273,488.32,631.293,0.669767,1.0,0.0,0.0,0.0,0.0
4,670.545,135.73,0.876232,637.211,208.202,0.872425,588.199,206.179,0.761004,492.151,...,603.837,0.770348,431.519,590.258,0.686644,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,745.024,100.303,0.90417,688.139,153.333,0.80749,635.228,147.331,0.799763,539.321,...,641.138,0.726055,505.941,629.4,0.664576,0.0,0.0,0.0,1.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [174]:
multimodel(final)

     75   76   77   78   79
10  0.0  1.0  0.0  0.0  0.0
8   0.0  0.0  0.0  1.0  0.0
2   1.0  0.0  0.0  0.0  0.0
12  0.0  1.0  0.0  0.0  0.0
     0    1    2    3    4
0  0.0  1.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  1.0  0.0
3  0.0  1.0  0.0  0.0  0.0


(0.85, 0.5714285714285715)