In [None]:
import time
import math
import numpy as np
import pandas as pd
from scipy.stats import circmean
import matplotlib.pyplot as plt
import sqlite3

In [None]:
'''
@param db_path - relative path to db (i.e. '../summer')
@param exclude_players - array of players to exclude from the train/test sets
    defaults to TAs ['\'Mark\'', '\'vhil\'', '\'record\'', '\'Eugy\'', '\'vhil2\'']

@param train_weeks - array of train weeks for train/test split
    defaults to ['\'1\'', '\'2\'', '\'3\'', '\'4\'', '\'5\'', '\'6\'']

@param test_weeks - array of train weeks for train/test split
    defaults to ['\'7\'', '\'8\'']

@param save_as_csv - boolean to save training and testing dataframes to csv
    defaults to False
@param csv_filepath - string filepath
    defaults to None, will not save if filepath is not specified

@return train_df
@return test_df

~1 hour 20 minutes to pull test/train dfs for full summer db with default settings
'''
def pull_data_from_db(db_path, exclude_players, train_weeks, test_weeks, save_as_csv=False, csv_filepath=None):
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    
    if not exclude_players:
        exclude_players = ['\'Mark\'', '\'vhil\'', '\'record\'', '\'Eugy\'', '\'vhil2\'']
    
    if not train_weeks:
        train_weeks = ['\'1\'', '\'2\'', '\'3\'', '\'4\'', '\'5\'', '\'6\'']
        
    if not test_weeks:
        test_weeks = ['\'7\'', '\'8\'']

    train_df = pd.read_sql_query("""SELECT PlayerName, RecordingName,
        HeadPos_x, HeadPos_y, HeadPos_z, 
        LeftHandPos_x, LeftHandPos_y, LeftHandPos_z,
        RightHandPos_x, RightHandPos_y, RightHandPos_z,
        HeadRot_w, HeadRot_x, HeadRot_y, HeadRot_z,
        LeftHandRot_w, LeftHandRot_x, LeftHandRot_y, LeftHandRot_z,
        RightHandRot_w, RightHandRot_x, RightHandRot_y, RightHandRot_z
        FROM PersonTime
        WHERE PlayerName NOT IN ({})
        AND RecordingName IN ({})
        """.format(",".join(exclude_players), ",".join(train_weeks)), con)

    test_df = pd.read_sql_query("""SELECT PlayerName, RecordingName,
        HeadPos_x, HeadPos_y, HeadPos_z, 
        LeftHandPos_x, LeftHandPos_y, LeftHandPos_z,
        RightHandPos_x, RightHandPos_y, RightHandPos_z,
        HeadRot_w, HeadRot_x, HeadRot_y, HeadRot_z,
        LeftHandRot_w, LeftHandRot_x, LeftHandRot_y, LeftHandRot_z,
        RightHandRot_w, RightHandRot_x, RightHandRot_y, RightHandRot_z
        FROM PersonTime
        WHERE PlayerName NOT IN ({})
        AND RecordingName IN ({})
        """.format(",".join(exclude_players), ",".join(test_weeks)), con)
    
    print(train_df.shape)
    print(test_df.shape)
    
    if save_as_csv and csv_filepath:
        train_df.to_csv(csv_filepath, index=False)
        test_df.to_csv(csv_filepath, index=False)
    
    return train_df, test_df

In [None]:
'''
Calculates euler angles based on Head, LeftHand, RightHand Quaternions

@param df - expects df to have w,x,y,z values for Head, LeftHand, RightHand

@return df with additional head, lhand, rhand, yaw pitch roll columns
'''
def calculate_euler_angles(df):
    head_q0, head_q1, head_q2, head_q3 = df['HeadRot_w'], df['HeadRot_x'], df['HeadRot_y'], df['HeadRot_z']
    lhand_q0, lhand_q1, lhand_q2, lhand_q3 = df['LeftHandRot_w'], df['LeftHandRot_x'], df['LeftHandRot_y'], df['LeftHandRot_z']
    rhand_q0, rhand_q1, rhand_q2, rhand_q3 = df['RightHandRot_w'], df['RightHandRot_x'], df['RightHandRot_y'], df['RightHandRot_z']

    df['head_roll'] = np.arctan2(2*(head_q0*head_q3-head_q1*head_q2), 1-2*(head_q3**2+head_q1**2))
    df['head_pitch'] = np.arcsin(2*(-head_q0*head_q1-head_q2*head_q3))
    df['head_yaw'] = np.arctan2(2*(head_q0*head_q2-head_q3*head_q1), 1-2*(head_q1**2+head_q2**2))

    df['lhand_roll'] = np.arctan2(2*(lhand_q0*lhand_q3-lhand_q1*lhand_q2), 1-2*(lhand_q3**2+lhand_q1**2))
    df['lhand_pitch'] = np.arcsin(2*(-lhand_q0*lhand_q1-lhand_q2*lhand_q3))
    df['lhand_yaw'] = np.arctan2(2*(lhand_q0*lhand_q2-lhand_q3*lhand_q1), 1-2*(lhand_q1**2+lhand_q2**2))

    df['rhand_roll'] = np.arctan2(2*(rhand_q0*rhand_q3-rhand_q1*rhand_q2), 1-2*(rhand_q3**2+rhand_q1**2))
    df['rhand_pitch'] = np.arcsin(2*(-rhand_q0*rhand_q1-rhand_q2*rhand_q3))
    df['rhand_yaw'] = np.arctan2(2*(rhand_q0*rhand_q2-rhand_q3*rhand_q1), 1-2*(rhand_q1**2+rhand_q2**2))
    
    return df

In [None]:
translations = ['HeadPos_y', 'LeftHandPos_y', 'RightHandPos_y']
euler_angles = ['head_roll', 'head_pitch', 'head_yaw', 
                'lhand_roll', 'lhand_pitch', 'lhand_yaw', 
                'rhand_roll', 'rhand_pitch', 'rhand_yaw']

'''
@param df - train or test dataframe
@param standard_columns - list of column names included in feature generation
    calculates features max, min, median, mean, std for column over sliding windows
    defaults to translations + euler_angles specified above
@param surging_strafing - boolean to include surging and strafing as a feature
'''
def preprocess_data(df, standard_columns=translations+euler_angles, surging_strafing=True):
    print('start time: ', time.strftime("%H:%M:%S", time.localtime()))
    
    player_names = df['PlayerName'].unique()
    recording_names = df['RecordingName'].unique()
    
    X = []
    y = []
    
    for recording_name in recording_names:
        recording_df = df[df['RecordingName'] == recording_name]

        print(recording_name, 'start_time', time.strftime("%H:%M:%S", time.localtime()))
        for name in player_names:
            player_df = recording_df[recording_df['PlayerName'] == name]

            for i in range(15, player_df.shape[0], 30):
                features = []

                window_1s = None
                window_3s = None
                window_10s = None
                window_30s = None

                if i >= 15 and i < player_df.shape[0] - 14:
                    window_1s = player_df.iloc[i - 15 : i + 14]
                if i >= 45 and i < player_df.shape[0] - 44:
                    window_3s = player_df.iloc[i - 45 : i + 44]
                if i >= 150 and i < player_df.shape[0] - 149:
                    window_10s = player_df.iloc[i - 150 : i + 149]
                if i >= 450 and i < player_df.shape[0] - 449:
                    window_30s = player_df.iloc[i - 450 : i + 449]

                if window_1s is None or window_3s is None or window_10s is None or window_30s is None:
                    continue


                for column_name in standard_columns:
                    for window in [window_1s, window_3s, window_10s, window_30s]:
                        column = window[column_name]
                        features.extend([column.max(), column.min(), column.median(), column.mean(), column.std()])
                
                if surging_strafing:
                    for window in [window_1s, window_3s, window_10s, window_30s]:
                        head_x_offset = window['HeadPos_x'] - window['HeadPos_x'].mean()
                        head_z_offset = window['HeadPos_z'] - window['HeadPos_z'].mean()
                        head_angle = -circmean(window['head_gamma'], high=np.pi, low=-np.pi)
                        head_surging = head_z_offset * np.cos(head_angle) - head_x_offset * np.sin(head_angle)
                        head_strafing = head_z_offset * np.sin(head_angle) + head_x_offset * np.cos(head_angle)

                        features.extend([head_surging.max(), head_surging.min(), head_surging.median(), head_surging.std()])
                        features.extend([head_strafing.max(), head_strafing.min(), head_strafing.median(), head_strafing.std()])

                        lhand_x_offset = window['LeftHandPos_x'] - window['LeftHandPos_x'].mean()
                        lhand_z_offset = window['LeftHandPos_z'] - window['LeftHandPos_z'].mean()
                        lhand_angle = -circmean(window['lhand_gamma'], high=np.pi, low=-np.pi)
                        lhand_surging = lhand_z_offset * np.cos(lhand_angle) - lhand_x_offset * np.sin(lhand_angle)
                        lhand_strafing = lhand_z_offset * np.sin(lhand_angle) + lhand_x_offset * np.cos(lhand_angle)

                        features.extend([lhand_surging.max(), lhand_surging.min(), lhand_surging.median(), lhand_surging.std()])
                        features.extend([lhand_strafing.max(), lhand_strafing.min(), lhand_strafing.median(), lhand_strafing.std()])

                        rhand_x_offset = window['RightHandPos_x'] - window['RightHandPos_x'].mean()
                        rhand_z_offset = window['RightHandPos_z'] - window['RightHandPos_z'].mean()
                        rhand_angle = -circmean(window['rhand_gamma'], high=np.pi, low=-np.pi)
                        rhand_surging = rhand_z_offset * np.cos(rhand_angle) - rhand_x_offset * np.sin(rhand_angle)
                        rhand_strafing = rhand_z_offset * np.sin(rhand_angle) + rhand_x_offset * np.cos(rhand_angle)

                        features.extend([rhand_surging.max(), rhand_surging.min(), rhand_surging.median(), rhand_surging.std()])
                        features.extend([rhand_strafing.max(), rhand_strafing.min(), rhand_strafing.median(), rhand_strafing.std()])


                X.append(features)
                y.append(playerIdx[name])

    print('end time: ', time.strftime("%H:%M:%S", time.localtime()))

    print(len(X))
    print(len(X[0]))
    print(len(y))
    
    return X, y

In [None]:
def dump_data(data, filepath, x=True):
    if x:
        with open(filepath, 'w') as f:
            write = csv.writer(f)
            write.writerows(data)
    else:
        with open(filepath, 'w') as f:
            f.write('\n'.join(str(y) for y in data))
    
def read_data(filepath, x=True):
    if x:
        X = pd.read_csv(filepath)
        X = X.values.tolist()
        print('X', len(X))
        return X
    
    else:
        y = []
        with open(filepath) as f:
            y = f.readlines()
        for i in range(len(y)):
            y[i] = re.sub(r'\D+', '', y[i])
            y[i] = int(y[i])
        print('y', len(y))
        return y