In [45]:
from sklearn.datasets import make_classification
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
import numpy as np
import boto3
import json
import pandas as pd
from io import StringIO


def select_frame(df, single=True):
    # Take df where each row is a frame in the video and return
    # either 1 still frame or multiple still frames
    #print(df.iloc[843])
    deltas = []
    for i in range(len(df)-100,100,-1):
        # iterate in reverse order as best poses often last
        current_ = df.iloc[i]
        next_ = df.iloc[i-1]
        deltas.append(np.mean(abs(next_ - current_)))
    SD = np.std(deltas)
        
    if single:
        # Single Frame
        for i in range(len(df)-1,0,-1):
            # iterate in reverse order as best poses often last
            current_ = df.iloc[i]
            next_ = df.iloc[i-1]
            d = np.mean(abs(next_ - current_)) 
            if d < 0.05*SD:
                print(f'frame index: {i}')
                frame = df.iloc[i-20:i]
                #print(len(frame))
                return frame
    else:
        # Multiple Frames
        ds = deltas < SD*0.5
        return df[:-1][ds]
    
    
def getCSV(filepath):
    # Get csv file from s3 and apply labels
    client = boto3.client('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')

    #client = boto3.client.get_Object('alignedstorage', filepath)
    bucket_name = 'alignedstorage'

    object_key = filepath
    csv_obj = client.get_object(Bucket=bucket_name, Key=object_key)
    body = csv_obj['Body']
    csv_string = body.read().decode('utf-8')

    df = pd.read_csv(StringIO(csv_string), index_col='Unnamed: 0')
    df['arm'] = df['knee1'] = df['knee2'] = df['hips'] = df['torso'] = 0
    if 'arm' in filepath:
        df['arm'] = 1
    elif 'knee1' in filepath:
        df['knee1'] = 1
    elif 'knee2' in filepath:
        df['knee2'] = 1
    elif 'hips' in filepath:
        df['hips'] = 1
    elif 'torso' in filepath:
        df['torso'] = 1
        
    frame = select_frame(df, single=True)
    
    return frame


def iterate_bucket_items(bucket):
    """
    Move through all CSV files, take still frames, and apply correct labels
    """

    #client = boto3.client('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')
    s3 = boto3.resource('s3',aws_access_key_id='AKIAJYPGAZE3RUOKVKVA',aws_secret_access_key='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')
    bucket = s3.Bucket('alignedstorage')
    prefix_objs = bucket.objects.filter(Prefix="training_data")
    
    col_names =  list(range(80))
    #print(col_names)
    full_df  = pd.DataFrame(columns = col_names)
    print(full_df)
    for obj in prefix_objs:
        key = obj.key
        #print(key)
        if 'csv' in key:
            single = getCSV(key)
            if single is None:
                continue
            full_df = full_df.append(single)
            print(full_df)
    
    #final = pd.DataFrame(np.asarray(full_df))
    final = full_df
    return final


def multi_acc(real, preds):
    # accuracy for multi classification model
    correct = 0
    incorrect = 0
    for i,j in zip(real.values.flatten(), preds.values.flatten()):
        if i==j:
            correct += 1
        else:
            incorrect +=1
    return correct / (correct+incorrect)


def multimodel(full_df):
    from sklearn.model_selection import train_test_split
    # Takes DF with each row as best pose from a video
    # Fits multi-label model and returns evaluation metrics
    from sklearn.metrics import accuracy_score, f1_score
    #print(full_df.head())
    
    #X = full_df.drop(columns = ['arm','knee1','knee2','torso','hips'])
    #Y = full_df[['arm','knee1','knee2','torso','hips']]
    X = full_df.drop(columns = [75,76,77,78,79])
    Y = full_df[[75,76,77,78,79]]
    #print(X.head())
    #print(Y.head())
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
    
    # Train Model
    forest = RandomForestClassifier(n_estimators=100, random_state=1)
    multi_target_forest = MultiOutputClassifier(forest, n_jobs=-1)
    multi_target_forest.fit(X_train, Y_train)
    
    # Evaluate Model
    preds = pd.DataFrame(multi_target_forest.predict(X_test))
    print(Y_test)
    print(preds)
    #accuracy = multi_acc(Y_test, preds)
    accuracy = accuracy_score(Y_test.values.flatten(), preds.values.flatten())
    f1 = f1_score(Y_test.values.flatten(), preds.values.flatten())
    return accuracy, f1


In [30]:
getCSV('training_data/warrior2_torso_1.csv')

frame index: 851


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,arm,knee1,knee2,hips,torso
832,835.124,151.421,0.879611,758.753,200.307,0.84656,697.994,182.665,0.747426,596.064,...,617.643,0.740047,449.102,598.024,0.579599,0,0,0,0,1
833,835.122,151.418,0.877184,758.728,200.311,0.846408,697.995,182.694,0.746638,596.044,...,617.635,0.740502,449.126,598.031,0.585526,0,0,0,0,1
834,835.125,151.42,0.880922,760.564,200.323,0.840845,699.906,182.692,0.733362,598.039,...,617.629,0.738601,449.119,598.033,0.576895,0,0,0,0,1
835,836.928,151.425,0.883468,760.564,200.341,0.842899,698.037,182.695,0.738073,598.015,...,617.642,0.735566,449.113,599.951,0.578679,0,0,0,0,1
836,836.943,153.3,0.883012,760.614,200.361,0.840107,699.887,182.707,0.727838,598.003,...,617.649,0.733619,449.122,598.045,0.57897,0,0,0,0,1
837,836.936,153.335,0.872944,760.617,200.344,0.84297,699.889,182.69,0.734407,596.059,...,617.639,0.736789,449.106,598.057,0.588281,0,0,0,0,1
838,836.975,153.34,0.886265,760.619,200.327,0.846767,699.891,182.669,0.742556,594.129,...,617.639,0.729058,449.141,598.009,0.584708,0,0,0,0,1
839,836.984,153.349,0.888372,760.621,200.311,0.838569,699.95,182.688,0.726217,596.091,...,617.635,0.719132,449.161,596.075,0.58431,0,0,0,0,1
840,836.973,155.286,0.87031,760.655,200.372,0.836591,701.864,182.709,0.715861,599.96,...,617.632,0.733216,449.138,596.09,0.585334,0,0,0,0,1
841,836.973,157.198,0.874158,760.619,200.366,0.8409,701.841,182.686,0.717961,598.019,...,617.625,0.733675,449.133,598.019,0.587025,0,0,0,0,1


In [46]:
final = iterate_bucket_items('alignedstorage')


Empty DataFrame
Columns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
Index: []

[0 rows x 80 columns]
frame index: 563


NameError: name 'nrows' is not defined

In [42]:
final

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,70,71,72,73,74,arm,knee1,knee2,hips,torso
544,,,,,,,,,,,...,637.218,0.786604,386.483,615.643,0.576916,1.0,0.0,0.0,0.0,0.0
545,,,,,,,,,,,...,637.222,0.788530,386.451,615.663,0.579151,1.0,0.0,0.0,0.0,0.0
546,,,,,,,,,,,...,637.230,0.791969,386.456,615.659,0.578899,1.0,0.0,0.0,0.0,0.0
547,,,,,,,,,,,...,637.230,0.791226,386.453,615.665,0.579776,1.0,0.0,0.0,0.0,0.0
548,,,,,,,,,,,...,637.199,0.791537,386.462,615.639,0.582725,1.0,0.0,0.0,0.0,0.0
549,,,,,,,,,,,...,637.221,0.784521,386.455,615.619,0.578639,1.0,0.0,0.0,0.0,0.0
550,,,,,,,,,,,...,637.231,0.785896,386.448,615.619,0.583354,1.0,0.0,0.0,0.0,0.0
551,,,,,,,,,,,...,637.236,0.783187,386.445,615.615,0.583218,1.0,0.0,0.0,0.0,0.0
552,,,,,,,,,,,...,637.226,0.782605,386.435,613.728,0.582228,1.0,0.0,0.0,0.0,0.0
553,,,,,,,,,,,...,637.237,0.780317,386.427,613.737,0.585257,1.0,0.0,0.0,0.0,0.0


In [14]:
multimodel(final)

     75   76   77   78   79
2   1.0  0.0  0.0  0.0  0.0
25  0.0  0.0  0.0  0.0  1.0
27  0.0  0.0  0.0  0.0  1.0
16  0.0  1.0  0.0  0.0  0.0
19  0.0  1.0  0.0  0.0  0.0
3   1.0  0.0  0.0  0.0  0.0
     0    1    2    3    4
0  0.0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  1.0  0.0
3  0.0  0.0  0.0  0.0  0.0
4  0.0  0.0  0.0  0.0  0.0
5  0.0  0.0  1.0  0.0  0.0


(0.7333333333333333, 0.0)