# Understanding the parser?
* In the last cell, enter the xml files of data you wish to use in the game_files list.
* Select an agregation function (e.g. agregate_feature_vectors) for the line "vect = agregation_function(game_files)"
* Export the now .csv data with the line "export_feature_vectors(vect, "name_of_the_file.csv")"

# First steps

## Imports & Imports of data

In [1]:
import xml.etree.ElementTree as et
import numpy as np
import json
import pandas as pd

In [2]:
with open ('./Hololens_data/p1.json') as json_file:
    data = json.load(json_file)

In [3]:
def norm(vect):
    sum = 0
    
    for el in vect:
        sum += el**2
    
    return np.sqrt(sum)

## Useful functions for extracting data from parsed json file

* The function `read_time` returns the time in second in float format from the parsed timestamp

In [4]:
def create_df_hand(game_file):
    with open(game_file) as json_file:
        data = json.load(json_file)
        
    df_game = pd.DataFrame(data['datasList'][0]['listLevelDatas'][0]['userDatas'])
    for i in range(1,len(data['datasList'][0]['listLevelDatas'])):
        df_game = pd.concat([df_game, pd.DataFrame(data['datasList'][0]['listLevelDatas'][i]['userDatas'])])
        
    
    
    #getting rid of the timeStamp's zero
    df_game = df_game[df_game['timeStamp']>0]
    
    #reset index after having got rid of the timeStamp zeros
    df_game = df_game.reset_index(drop = True) 
    
    #let's create three new columns, each one with one coordinate for df_game:
    #If they show later to be useless, we supprime these lines to get rid of them
    position =  df_game['headPos'].apply(pd.Series)
    df_game = pd.concat([df_game, position], axis=1)
    
    #Here we create a column withe a 4-element tuple: (x,y,z,t) for each dataframe
    df_game['hand_positions'] = df_game[['x', 'y', 'z', 'timeStamp']].apply(lambda x: tuple(x), axis=1)
    
    return df_game

* The function `parse_root` returns an array containing all the parsed data from the file named 'game_file'

In [5]:
def create_df_balloon(game_file):
        
    with open(game_file) as json_file:
        data = json.load(json_file)
        
        
    df_balloon = pd.DataFrame(data['datasList'][0]['listLevelDatas'][0]['listBalloonDatas'])
    for i in range(1,len(data['datasList'][0]['listLevelDatas'])):
        df_balloon = pd.concat([df_balloon, pd.DataFrame(data['datasList'][0]['listLevelDatas'][i]['listBalloonDatas'])])
    
    return df_balloon

* The function `hand_positions` extracts the positions of the right hand along with the time corresponding to those positions. It returns an array of shape [(x, y, z, t)] (length number_of_position, with 4 elements arrays representing (x, y, z, t)).

In [6]:
def hand_positions(game_file):
    return list(create_df_hand(game_file)['hand_positions'])

* The function `bubble_pop` extracts the time of each game event corresponding to the pop of a bubble by the player. It returns an array of shape [t] (length number_of_bubble_poped).

In [7]:
def bubble_pop(game_file):
    return list(create_df_balloon(game_file)['timeOfDestroy'])

* This last function `bubble_pop_clean` returns the time of bubble gathering, minus the last wave if it misses some of the data

# Extraction of sub-trajectories & features
The function `sub_trajectories` returns an array of shape [[*[(x,y,z,t),(x,y,z,t),...]*, for each bubble in wave], for each wave]. To access all positions and time of the trajectory between the *i* and *i+1* bubble of the *n* wave : *sub_trajectories[n-1][i]*.

In [8]:
def sub_trajectories(game_file):
    hand_position = hand_positions(game_file)
    #bubble_pop_time = bubble_pop_clean(game_file)
    bubble_pop_time = bubble_pop(game_file)
    
    th = hand_position[0][3] #change to 3 because we have one more dimension
    
    sub_traj=[]
    
    nb_waves = len(bubble_pop_time)//5
    i=0 #loop count for waves
    k=0 #loop count for hand positions
    while i<nb_waves :
        sub_traj.append([])
        j=0 #loop count for bubbles
        while j<5:
            sub_traj[i].append([])
            t = bubble_pop_time[j+5*i] #the time the bubble was gathered
            while th < t:
                sub_traj[i][j].append(hand_position[k]) #appends the position of the hand and the corresponding time
                k+=1
                th = hand_position[k][3]
            j+=1
        i+=1
    
    return np.array(sub_traj)

We define some functions to extract interesting features from trajectories. We first look for Static features : 
* `length` returns the length of the trajectory *traj*
* `barycenter` returns the barycenter of the trajectory *traj* in shape (x,y)
* `location` returns the average distance of each point to the barycenter of the trajectory *traj*
* `location_max` returns the maximum distance between a point of the trajectory and the barycenter of this trajectory
* `orientation` returns the angle between points the line between *(x1, y1)* and *(x2, y2)* and the horizontal axis (in degrees)
* `orientation_feat` returns the preceeding feature for the first two points and the last two points of the trajectory *traj*
* `nb_turns` returns the number of turns in the trajectory *traj*, where a turn is detected if the orientation between two consecutive couples of points varies of more than *limit_angle*

In [9]:
def length(traj):
    l = 0
    
    for i in range(len(traj)-1):
        l += np.sqrt((traj[i+1][0]-traj[i][0])**2 + (traj[i+1][1]-traj[i][1])**2+(traj[i+1][2]-traj[i][2])**2)
    
    return l

def barycenter(traj):
    x = 0
    y = 0
    z=0
    n = len(traj)
    
    for i in range(n):
        x += traj[i][0]
        y += traj[i][1]
        z += traj[i][2]
    if n>0:
        return (x/n, y/n, z/n)
    else:
        return (0,0,0)

def location(traj):
    loc_avg = 0
    n = len(traj)
    p = barycenter(traj)
    
    for i in range(n):
        loc_avg += np.sqrt((traj[i][0] - p[0])**2 + (traj[i][1] - p[1])**2+(traj[i][2]-p[2])**2)
        
    return loc_avg/n

def location_max(traj):
    n = len(traj)
    p = barycenter(traj)
    if n>0:
        l_max = np.max([np.sqrt((traj[i][0] - p[0])**2 + (traj[i][1] - p[1])**2+(traj[i][2]-p[2])**2) for i in range(n)])
        return l_max
    else:
        return 0

def orientation(x1, x2 , y1, y2, z1, z2):
    if x2-x1<0:
        return [np.arctan((y2 - y1)/(x2 - x1)) * (180/np.pi),np.arctan((z2 - z1)/(x2 - x1)) * (180/np.pi)+180] #in degree
    elif z2-z1<0 and x2-x1>0:
        return [np.arctan((y2 - y1)/(x2 - x1)) * (180/np.pi),np.arctan((z2 - z1)/(x2 - x1)) * (180/np.pi)] #in degree
    if x2 == x1 and y2>=y1 and z2==z1:
        return [90,0]
    elif x2 == x1 and y2<=y1 and z2==z1:
        return [-90,0]
    elif x2-x1>0 and z2-z1>=0:
        return [np.arctan((y2 - y1)/(x2 - x1)) * (180/np.pi),np.arctan((z2 - z1)/(x2 - x1)) * (180/np.pi)+180] #in degree

def orientation_feat(traj):
    n = len(traj)
    if n>1:
        ts = orientation(traj[0][0], traj[1][0], traj[0][1], traj[1][1], traj[0][2], traj[1][2])
        te = orientation(traj[-2][0], traj[-1][0], traj[-2][1], traj[-1][1], traj[-2][2], traj[-1][2]) 

        return (ts, te)
    else:
        return ([0,0],[0,0])

def nb_turns(traj, limit_angle):
    nb_turns = 0
    n=len(traj)
    
    for i in range(n-2):
        if(np.abs(orientation(traj[i][0], traj[i+1][0], traj[i][1], traj[i+1][1], traj[i][2], traj[i+1][2])[0] - orientation(traj[i+1][0], traj[i+2][0], traj[i+1][1], traj[i+2][1], traj[i+1][2], traj[i+2][2])[0]) > limit_angle1):
            nb_turns += 1
    
    return nb_turns

We then define dynamic features:
* `velocity` returns the list of the point to point velocities over the whole trajectory *traj*
* `velocity_avg` returns the average velocity over the trajectory *traj*
* `velocity_max` returns the greatest velocity over the trajectory *traj*
* `velocity_min` returns the lowest velocity over the trajectory *traj*
* `nb_vmin` returns the number of local minimum of velocity
* `nb_vmax` returns the number of local maximum of velocity

In [10]:
def velocity(traj):
    velocity = []
    
    for i in range(len(traj) - 1):
        v = norm(np.array(traj)[i+1][:3] - np.array(traj)[i][:3]) / (np.array(traj)[i+1][3] - np.array(traj)[i][3])
        velocity.append(v)
        
    return np.array(velocity)

def velocity_avg(traj):
    v_avg = 0
    n = len(traj)
    if n>1:
        v_list = velocity(traj)

        for i in range(n-1):
            v_avg += v_list[i]

        return v_avg/(n-1)
    else:
        return 0

def velocity_max(traj):
    if len(traj)>1:
        return np.max(velocity(traj))
    else:
        return 0

def velocity_min(traj):
    if len(traj)>1:
        return np.min(velocity(traj))
    else:
        return 0

def nb_vmin(traj):
    nb = 0
    v_list = velocity(traj)
    
    for i in range(1,len(v_list)-1):
        if v_list[i]<v_list[i+1] and v_list[i]<v_list[i-1]:
            nb += 1
    
    return nb

def nb_vmax(traj):
    nb = 0
    v_list = velocity(traj)
    
    for i in range(1,len(v_list)-1):
        if v_list[i]>v_list[i+1] and v_list[i]>v_list[i-1]:
            nb += 1
    
    return nb

The function `feature_vector` extracts features from the trajectory in argument *traj = [(x,y)]* and takes in argument a list of features with some of the following elements:["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]

In [11]:
def bucketize_nb_turns(nb_turn):
    if nb_turn <=10:
        return [1, 0, 0, 0]
    elif nb_turn <=20:
        return [0, 1, 0, 0]
    elif nb_turn <=30: 
        return [0, 0, 1, 0]
    else:
        return [0, 0, 0, 1] 

In [12]:
def bucketize_nb_v(nb_v):
    if nb_v < 10:
        return [1, 0, 0, 0]
    elif nb_v < 20: 
        return [0, 1, 0, 0]
    elif nb_v < 30: 
        return [0, 0, 1, 0]
    else:
        return [0, 0, 0, 1] 

In [13]:
def feature_vector(traj, playerID, game_area, limit_angle=0.25,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    diag = np.sqrt(game_area[0]**2 + game_area[1]**2)
    listetot=[]
    dist=0
    feature_vector = [playerID]
    for i in range(len(traj)):
        listetot+=traj[i]
        dist+=length(traj[i])
    bc=barycenter(listetot)
    if "dist/diag" in Listefeatures:
        feature_vector.append(dist/diag)
    if "game area" in Listefeatures:
        feature_vector.append(np.float64(0.5 + bc[0] / game_area[0])) # between 0 and 1
        feature_vector.append(np.float64(0.5 + bc[1] / game_area[1]))
    if location_max(listetot) == 0 and "barycenter distance" in Listefeatures:
            feature_vector.append(np.float64(0))
    elif "barycenter distance" in Listefeatures:
            feature_vector.append(location(listetot)/location_max(listetot))
    angles = 0.5 + np.array(orientation_feat(listetot)) / 180 # between 0 and 1
    if "angles" in Listefeatures:
        feature_vector.append(angles[0][0]) #first orientation of traj
        feature_vector.append(angles[0][1])
        feature_vector.append(angles[1][1])#last orientation of traj
        feature_vector.append(angles[1][1])
    if "nb turns" in Listefeatures:
        feature_vector.append(nb_turns(listetot, limit_angle))
    if "velocity average" in Listefeatures:
        feature_vector.append(velocity_avg(listetot))
    if "velocity min" in Listefeatures:
        feature_vector.append(velocity_min(listetot))
    if "velocity max" in Listefeatures:
        feature_vector.append(velocity_max(listetot))
    if "number of mins" in Listefeatures:
        feature_vector.append(nb_vmin(listetot))
    if "number of maxs" in Listefeatures:
        feature_vector.append(nb_vmax(listetot))
    
    return feature_vector

In [15]:
#print(feature_vector(sub_trajectories(data),0,[21,10]))

In [16]:
def feature_vector_bucket(traj, playerID, game_area = [21,10], limit_angle=0.25,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    diag = np.sqrt(game_area[0]**2 + game_area[1]**2)
    listetot=[]
    dist=0
    feature_vector = [playerID]
    for i in range(len(traj)):
        listetot+=traj[i]
        dist+=length(traj[i])
    bc=barycenter(listetot)
    if "dist/diag" in Listefeatures:
        feature_vector.append(dist/diag)
    if "game area" in Listefeatures:
        feature_vector.append(np.float64(0.5 + bc[0] / game_area[0])) # between 0 and 1
        feature_vector.append(np.float64(0.5 + bc[1] / game_area[1]))
    if location_max(listetot) == 0 and "barycenter distance" in Listefeatures:
            feature_vector.append(np.float64(0))
    elif "barycenter distance" in Listefeatures:
            feature_vector.append(location(listetot)/location_max(listetot))
    angles = 0.5 + np.array(orientation_feat(listetot)) / 180 # between 0 and 1
    if "angles" in Listefeatures:
        feature_vector.append(angles[0][0]) #first orientation of traj
        feature_vector.append(angles[0][1])
        feature_vector.append(angles[1][1])#last orientation of traj
        feature_vector.append(angles[1][1])
    if "nb turns" in Listefeatures:
        bucket = bucketize_nb_turns(nb_turns(listetot, limit_angle))
        for i in bucket:
            feature_vector.append(i)
    
    v_max = velocity_max(listetot)
    if v_max == 0:
        if "velocity average" in Listefeatures:
            feature_vector.append(0)
        if "velocity max" in Listefeatures:
            feature_vector.append(0)
        if "velocity min" in Listefeatures:
            feature_vector.append(0)
    else:
        if "velocity average" in Listefeatures:
            feature_vector.append(velocity_avg(listetot) / v_max)
        if "velocity min" in Listefeatures:
            feature_vector.append(velocity_min(listetot) / v_max)
        if "velocity max" in Listefeatures:
            feature_vector.append(v_max)
    
    bucket_min = bucketize_nb_v(nb_vmin(listetot))
    bucket_max = bucketize_nb_v(nb_vmax(listetot))
    if "number of mins" in Listefeatures:
        for i in bucket_min:
            feature_vector.append(i)
    if "number of maxs" in Listefeatures:
        for j in bucket_max:
            feature_vector.append(j)
    
    return feature_vector
    


The function `feature_vectors_game` allows to create the feature vectors over all the trajectories between the gathering of two bubbles of one game. The returned array is an array of multiple 13x5 arrays (the five feature vectors, containing 13 features each, corresponding to the five trajectories of each wave).

In [17]:
def feature_vectors_game(game_file, game_area = [21,10],Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    playerID = int(parse_root(game_file)[2][0].text)
    vectors = []
    for i in range(0,nb_waves):
        vectors.append(feature_vector(trajectories[i], playerID, game_area,Listefeatures))
    
    return np.array(vectors)

In [18]:
def simple_features_generator(game_list,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    features=[]
    labels=[]
    for file in game_list:
        for layer1 in feature_vectors_game(file,Listefeatures):
            features.append(layer1[1:])
            labels.append(layer1[0])
    np.savetxt('features.csv', features, delimiter=",")
    np.savetxt('output.csv', labels, delimiter=",")
    return features, labels

In [19]:
def simple_features_bucket_generator(game_list,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    features=[]
    labels=[]
    for file in game_list:
        trajectories = np.array(sub_trajectories(file))
        playerID = int(parse_root(file)[2][0].text)
        for traj in trajectories:
            for layer1 in feature_vector_bucket(traj, playerID,Listefeatures):
                features.append(layer1[1:])
                labels.append(layer1[0])
    np.savetxt('features_bucket.csv', features, delimiter=",")
    np.savetxt('output_bucket.csv', labels, delimiter=",")
    return features, labels

The following functions provide different shapes for the feature vector. This way of creating the feature vector could be improved by using tensorflow and its feature vectors, instead of creating it "by hand".
* "concat" means all features are concatenated into one numpy vector for each sample
* "bucket" means it uses the bucketized version of the feature vector (for nb_turns, nb_vmin, nb_vmax)
* "hands"  means it uses the hand used to play as label instead of the player's ID

In [20]:
def feature_vectors_game_concat(game_file, game_area = [21,10],Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    playerID = int(parse_root(game_file)[2][0].text)
    vectors = []
    
    for i in range(nb_waves):
        vectors.append([])
        for traj in trajectories[i]:
            vectors[i] = vectors[i] + list(feature_vector(traj, playerID, game_area,Listefeatures)[1:])
        vectors[i].append(playerID)
    
    return np.array(vectors)

In [21]:
def feature_vectors_bucket_game_concat(game_file, game_area = [21,10],Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    playerID = int(parse_root(game_file)[2][0].text)
    vectors = []
    
    for i in range(nb_waves):
        vectors.append([])
        for traj in trajectories[i]:
            vectors[i] = vectors[i] + list(feature_vector_bucket(traj, playerID, game_area,Listefeatures)[1:])
        vectors[i].append(playerID)
    
    return np.array(vectors)

In [22]:
def feature_vectors_bucket_game_concat_hands(game_file, game_area = [21,10],Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    if parse_root(game_file)[2][2].text == 'false':
        useRightHand = 0
    else:
        useRightHand = 1
    vectors = []
    
    for i in range(nb_waves):
        vectors.append([])
        for traj in trajectories[i]:
            vectors[i] = vectors[i] + list(feature_vector_bucket(traj, useRightHand, game_area,Listefeatures)[1:])
        vectors[i].append(useRightHand)
    
    return np.array(vectors)

Finally we provide a function to get the agregation of all feature vectors over multiple game files, where *game_files* is the list of the names (String type) of all the game files to be considered.

In [23]:
def agregate_feature_vectors(game_files,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    vectors = []
    for file in game_files:
        vectors = vectors + list(feature_vectors_game_concat(file,Listefeatures))
    
    return np.array(vectors)

In [24]:
def agregate_feature_vectors_bucket(game_files,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    vectors = []
    for file in game_files:
        vectors = vectors + list(feature_vectors_bucket_game_concat(file,Listefeatures))
    
    return np.array(vectors)

In [25]:
def agregate_feature_vectors_bucket_hands(game_files,Listefeatures=["dist/diag","game area","barycenter distance","angles","nb turns","velocity average","velocity min","velocity max","number of mins","number of maxs"]):
    vectors = []
    for file in game_files:
        vectors = vectors + list(feature_vectors_bucket_game_concat_hands(file,Listefeatures))
    
    return np.array(vectors)

# Export of the final data

In [26]:
def export_feature_vectors(vectors, name):
    np.savetxt(name, vectors, delimiter=",")