# Understanding the parser?
* In the last cell, enter the xml files of data you wish to use in the game_files list.
* Select an agregation function (e.g. agregate_feature_vectors) for the line "vect = agregation_function(game_files)"
* Export the now .csv data with the line "export_feature_vectors(vect, "name_of_the_file.csv")"

# First steps

## Imports & Imports of data

In [1]:
import xml.etree.ElementTree as et
import numpy as np

In [2]:
def norm(vect):
    sum = 0
    
    for el in vect:
        sum += el**2
    
    return np.sqrt(sum)

## Useful functions for extracting data from parsed xml file

* The function `read_time` returns the time in second in float format from the parsed timestamp

In [3]:
def read_time(timestamp):
    index1 = timestamp.find('T')
    index2 = timestamp.find('+')
    return float(timestamp[index1+4:index1+6]) * 60 + float(timestamp[index1+7:index2])

* The function `parse_root` returns an array containing all the parsed data from the file named 'game_file'

In [4]:
def parse_root(game_file):
    root = et.parse(game_file).getroot()
    return root

* The function `hand_positions` extracts the positions of the right hand along with the time corresponding to those positions. It returns an array of shape [(x, y, t)] (length number_of_position, with 3 elements arrays representing (x, y, t)).

In [5]:
def hand_positions(game_file):
    hand_positions = parse_root(game_file)[1]
    
    array = []
    
    for vector2 in hand_positions:
        #print(vector2[0][0].text)
        x = float(vector2[0][0].text)
        y = float(vector2[0][1].text)
        t = read_time(vector2[1].text)
        array.append((x, y, t))
    
    return array

In [6]:
#root = parse_root('C:/Users/Julien/Desktop/Centrale/KATE/Kate 2A/AlgoML/kate.xml')
root = parse_root('C:/Users/menoci/Desktop/Studies/autisme et ML/Code+Data/xml_data/julien_main_droite_1.xml')

print(root[2][1].text)
if root[2][1].text == 'false':
    print('Oui')

Julien


* The function `bubble_pop` extracts the time of each game event corresponding to the pop of a bubble by the player. It returns an array of shape [t] (length number_of_bubble_poped).

In [7]:
def bubble_pop(game_file):
    bubble_logs = parse_root(game_file)[0]
    
    return_array = []
    
    for event in bubble_logs :
        if event[0].text == "gather" :
            t = read_time(event[2].text)
            return_array.append(t)
    
    return return_array

* This last function `bubble_pop_clean` returns the time of bubble gathering, minus the last wave if it misses some of the data

In [8]:
def bubble_pop_clean(game_file):
    bubble_pop_time = bubble_pop(game_file)
    
    i = len(bubble_pop_time)%5
    
    if i > 0:
        return bubble_pop_time[:-(5-i)]
    else:
        return bubble_pop_time

# Extraction of sub-trajectories & features
The function `sub_trajectories` returns an array of shape [[*[(x,y,t),(x,y,t),...]*, for each bubble in wave], for each wave]. To access all positions and time of the trajectory between the *i* and *i+1* bubble of the *n* wave : *sub_trajectories[n-1][i]*.

In [9]:
def sub_trajectories(game_file):
    hand_position = hand_positions(game_file)
    bubble_pop_time = bubble_pop_clean(game_file)
    
    th = hand_position[0][2]
    
    sub_traj=[]
    
    nb_waves = len(bubble_pop_time)//5
    i=0 #loop count for waves
    k=0 #loop count for hand positions
    while i<nb_waves :
        sub_traj.append([])
        j=0 #loop count for bubbles
        while j<5:
            sub_traj[i].append([])
            t = bubble_pop_time[j+5*i] #the time the bubble was gathered
            while th < t:
                sub_traj[i][j].append(hand_position[k]) #appends the position of the hand and the corresponding time
                k+=1
                th = hand_position[k][2]
            j+=1
        i+=1
    
    return np.array(sub_traj)

We define some functions to extract interesting features from trajectories. We first look for Static features : 
* `length` returns the length of the trajectory *traj*
* `barycenter` returns the barycenter of the trajectory *traj* in shape (x,y)
* `location` returns the average distance of each point to the barycenter of the trajectory *traj*
* `location_max` returns the maximum distance between a point of the trajectory and the barycenter of this trajectory
* `orientation` returns the angle between points the line between *(x1, y1)* and *(x2, y2)* and the horizontal axis (in degrees)
* `orientation_feat` returns the preceeding feature for the first two points and the last two points of the trajectory *traj*
* `nb_turns` returns the number of turns in the trajectory *traj*, where a turn is detected if the orientation between two consecutive couples of points varies of more than *limit_angle*

In [10]:
def length(traj):
    l = 0
    
    for i in range(len(traj)-1):
        l += np.sqrt((traj[i+1][0]-traj[i][0])**2 + (traj[i+1][1]-traj[i][1])**2)
    
    return l

def barycenter(traj):
    x = 0
    y = 0
    n = len(traj)
    
    for i in range(n):
        x += traj[i][0]
        y += traj[i][1]
    
    if n>0:
        return (x/n, y/n)
    else:
        return (0,0)

def location(traj):
    loc_avg = 0
    n = len(traj)
    p = barycenter(traj)
    
    for i in range(n):
        loc_avg += np.sqrt((traj[i][0] - p[0])**2 + (traj[i][1] - p[1])**2)
        
    return loc_avg/n

def location_max(traj):
    n = len(traj)
    p = barycenter(traj)
    if n>0:
        l_max = np.max([np.sqrt((traj[i][0] - p[0])**2 + (traj[i][1] - p[1])**2) for i in range(n)])
        return l_max
    else:
        return 0

def orientation(x1, x2 , y1, y2):
    if x2 == x1 and y2>=y1:
        return 90
    elif x2 == x1 and y2<=y1:
        return -90
    else:
        return np.arctan((y2 - y1)/(x2 - x1)) * (180/np.pi) #in degree

def orientation_feat(traj):
    n = len(traj)
    if n>1:
        ts = orientation(traj[0][0], traj[1][0], traj[0][1], traj[1][1])
        te = orientation(traj[-2][0], traj[-1][0], traj[-2][1], traj[-1][1]) 

        return (ts, te)
    else:
        return (0,0)

def nb_turns(traj, limit_angle):
    nb_turns = 0
    n=len(traj)
    
    for i in range(n-2):
        if(np.abs(orientation(traj[i][0], traj[i+1][0], traj[i][1], traj[i+1][1]) - orientation(traj[i+1][0], traj[i+2][0], traj[i+1][1], traj[i+2][1])) > limit_angle):
            nb_turns += 1
    
    return nb_turns

We then define dynamic features:
* `velocity` returns the list of the point to point velocities over the whole trajectory *traj*
* `velocity_avg` returns the average velocity over the trajectory *traj*
* `velocity_max` returns the greatest velocity over the trajectory *traj*
* `velocity_min` returns the lowest velocity over the trajectory *traj*
* `nb_vmin` returns the number of local minimum of velocity
* `nb_vmax` returns the number of local maximum of velocity

In [11]:
def velocity(traj):
    velocity = []
    
    for i in range(len(traj) - 1):
        v = norm(np.array(traj)[i+1][:2] - np.array(traj)[i][:2]) / (np.array(traj)[i+1][2] - np.array(traj)[i][2])
        velocity.append(v)
        
    return np.array(velocity)

def velocity_avg(traj):
    v_avg = 0
    n = len(traj)
    if n>1:
        v_list = velocity(traj)

        for i in range(n-1):
            v_avg += v_list[i]

        return v_avg/(n-1)
    else:
        return 0

def velocity_max(traj):
    if len(traj)>1:
        return np.max(velocity(traj))
    else:
        return 0

def velocity_min(traj):
    if len(traj)>1:
        return np.min(velocity(traj))
    else:
        return 0

def nb_vmin(traj):
    nb = 0
    v_list = velocity(traj)
    
    for i in range(1,len(v_list)-1):
        if v_list[i]<v_list[i+1] and v_list[i]<v_list[i-1]:
            nb += 1
    
    return nb

def nb_vmax(traj):
    nb = 0
    v_list = velocity(traj)
    
    for i in range(1,len(v_list)-1):
        if v_list[i]>v_list[i+1] and v_list[i]>v_list[i-1]:
            nb += 1
    
    return nb

The function `feature_vector` extracts features from the trajectory in argument *traj = [(x,y)]*

In [12]:
def bucketize_nb_turns(nb_turn):
    if nb_turn <=10:
        return [1, 0, 0, 0]
    elif nb_turn <=20:
        return [0, 1, 0, 0]
    elif nb_turn <=30: 
        return [0, 0, 1, 0]
    else:
        return [0, 0, 0, 1] 

In [13]:
def bucketize_nb_v(nb_v):
    if nb_v < 10:
        return [1, 0, 0, 0]
    elif nb_v < 20: 
        return [0, 1, 0, 0]
    elif nb_v < 30: 
        return [0, 0, 1, 0]
    else:
        return [0, 0, 0, 1] 

In [14]:
def feature_vector(traj, playerID, game_area, limit_angle=0.25):
    diag = np.sqrt(game_area[0]**2 + game_area[1]**2)
    listetot=[]
    dist=0
    feature_vector = [playerID]
    for i in range(len(traj)):
        listetot+=traj[i]
        dist+=length(traj[i])
    bc=barycenter(listetot)
    feature_vector.append(dist/diag)
    feature_vector.append(np.float64(0.5 + bc[0] / game_area[0])) # between 0 and 1
    feature_vector.append(np.float64(0.5 + bc[1] / game_area[1]))
    if location_max(listetot) == 0:
            feature_vector.append(np.float64(0))
    else:
            feature_vector.append(location(listetot)/location_max(listetot))
    angles = 0.5 + np.array(orientation_feat(listetot)) / 180 # between 0 and 1
    feature_vector.append(angles[0]) #first orientation of traj
    feature_vector.append(angles[1]) #last orientation of traj
    feature_vector.append(nb_turns(listetot, limit_angle))
    
    feature_vector.append(velocity_avg(listetot))
    
    feature_vector.append(velocity_min(listetot))
    feature_vector.append(velocity_max(listetot))
    
    feature_vector.append(nb_vmin(listetot))
    feature_vector.append(nb_vmax(listetot))
    
    return feature_vector

In [15]:
def feature_vector_bucket(traj, playerID, game_area = [21,10], limit_angle=0.25):
    diag = np.sqrt(game_area[0]**2 + game_area[1]**2)
    listetot=[]
    dist=0
    feature_vector = [playerID]
    for i in range(len(traj)):
        listetot+=traj[i]
        dist+=length(traj[i])
    bc=barycenter(listetot)
    feature_vector.append(dist/diag)
    feature_vector.append(np.float64(0.5 + bc[0] / game_area[0])) # between 0 and 1
    feature_vector.append(np.float64(0.5 + bc[1] / game_area[1]))
    if location_max(listetot) == 0:
            feature_vector.append(np.float64(0))
    else:
            feature_vector.append(location(listetot)/location_max(listetot))
    angles = 0.5 + np.array(orientation_feat(listetot)) / 180 # between 0 and 1
    feature_vector.append(angles[0]) #first orientation of traj
    feature_vector.append(angles[1]) #last orientation of traj
    bucket = bucketize_nb_turns(nb_turns(listetot, limit_angle))
    for i in bucket:
        feature_vector.append(i)
    
    v_max = velocity_max(listetot)
    if v_max == 0:
        feature_vector.append(0)
        feature_vector.append(0)
        feature_vector.append(0)
    else:
        feature_vector.append(velocity_avg(listetot) / v_max)

        feature_vector.append(velocity_min(listetot) / v_max)
        feature_vector.append(v_max)
    
    bucket_min = bucketize_nb_v(nb_vmin(listetot))
    bucket_max = bucketize_nb_v(nb_vmax(listetot))
    for i in bucket_min:
        feature_vector.append(i)
    for j in bucket_max:
        feature_vector.append(j)
    
    return feature_vector
    


The function `feature_vectors_game` allows to create the feature vectors over all the trajectories between the gathering of two bubbles of one game. The returned array is an array of multiple 13x5 arrays (the five feature vectors, containing 13 features each, corresponding to the five trajectories of each wave).

In [16]:
def feature_vectors_game(game_file, game_area = [21,10]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    playerID = int(parse_root(game_file)[2][0].text)
    vectors = []
    for i in range(0,nb_waves):
        vectors.append(feature_vector(trajectories[i], playerID, game_area))
    
    return np.array(vectors)

In [17]:
def simple_features_generator(game_list):
    features=[]
    labels=[]
    for file in game_list:
        for layer1 in feature_vectors_game(file):
            features.append(layer1[1:])
            labels.append(layer1[0])
    np.savetxt('features.csv', features, delimiter=",")
    np.savetxt('output.csv', labels, delimiter=",")
    return features, labels

In [31]:
def simple_features_bucket_generator(game_list):
    features=[]
    labels=[]
    for file in game_list:
        trajectories = np.array(sub_trajectories(file))
        playerID = int(parse_root(file)[2][0].text)
        for traj in trajectories:
            features.append(feature_vector_bucket(traj, playerID)[1:])
            labels.append(feature_vector_bucket(traj, playerID)[0])
    np.savetxt('features_bucket.csv', features, delimiter=",")
    np.savetxt('output_bucket.csv', labels, delimiter=",")
    return features, labels

The following functions provide different shapes for the feature vector. This way of creating the feature vector could be improved by using tensorflow and its feature vectors, instead of creating it "by hand".
* "concat" means all features are concatenated into one numpy vector for each sample
* "bucket" means it uses the bucketized version of the feature vector (for nb_turns, nb_vmin, nb_vmax)
* "hands"  means it uses the hand used to play as label instead of the player's ID

In [19]:
def feature_vectors_game_concat(game_file, game_area = [21,10]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    playerID = int(parse_root(game_file)[2][0].text)
    vectors = []
    
    for i in range(nb_waves):
        vectors.append([])
        for traj in trajectories[i]:
            vectors[i] = vectors[i] + list(feature_vector(traj, playerID, game_area)[1:])
        vectors[i].append(playerID)
    
    return np.array(vectors)

In [20]:
def feature_vectors_bucket_game_concat(game_file, game_area = [21,10]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    playerID = int(parse_root(game_file)[2][0].text)
    vectors = []
    
    for i in range(nb_waves):
        vectors.append([])
        for traj in trajectories[i]:
            vectors[i] = vectors[i] + list(feature_vector_bucket(traj, playerID, game_area)[1:])
        vectors[i].append(playerID)
    
    return np.array(vectors)

In [21]:
def feature_vectors_bucket_game_concat_hands(game_file, game_area = [21,10]):
    trajectories = np.array(sub_trajectories(game_file))
    nb_waves = len(trajectories)
    if parse_root(game_file)[2][2].text == 'false':
        useRightHand = 0
    else:
        useRightHand = 1
    vectors = []
    
    for i in range(nb_waves):
        vectors.append([])
        for traj in trajectories[i]:
            vectors[i] = vectors[i] + list(feature_vector_bucket(traj, useRightHand, game_area)[1:])
        vectors[i].append(useRightHand)
    
    return np.array(vectors)

Finally we provide a function to get the agregation of all feature vectors over multiple game files, where *game_files* is the list of the names (String type) of all the game files to be considered.

In [22]:
def agregate_feature_vectors(game_files):
    vectors = []
    for file in game_files:
        vectors = vectors + list(feature_vectors_game_concat(file))
    
    return np.array(vectors)

In [23]:
def agregate_feature_vectors_bucket(game_files):
    vectors = []
    for file in game_files:
        vectors = vectors + list(feature_vectors_bucket_game_concat(file))
    
    return np.array(vectors)

In [24]:
def agregate_feature_vectors_bucket_hands(game_files):
    vectors = []
    for file in game_files:
        vectors = vectors + list(feature_vectors_bucket_game_concat_hands(file))
    
    return np.array(vectors)

# Export of the final data

In [25]:
def export_feature_vectors(vectors, name):
    np.savetxt(name, vectors, delimiter=",")

In [26]:
relative_path = 'C:/Users/menoci/Desktop/Studies/autisme et ML/Code+Data/xml_data/'
print(relative_path+'abc.xml')

C:/Users/menoci/Desktop/Studies/autisme et ML/Code+Data/xml_data/abc.xml


In [27]:
relative_path = 'C:/Users/menoci/Desktop/Studies/autisme et ML/Code+Data/xml_data/'

game_files=[relative_path+'paul_main_droite_1.xml',
           relative_path+'paul_main_droite_2.xml',
           relative_path+'paul_main_droite_3.xml',
           relative_path+'paul_main_droite_4.xml',
           relative_path+'paul_main_gauche_1.xml',
           relative_path+'paul_main_gauche_2.xml', 
           relative_path+'paul_main_gauche_3.xml',
           relative_path+'paul_main_gauche_4.xml',
           relative_path+'sarah_main_droite_1.xml',
           relative_path+'sarah_main_droite_2.xml',
           relative_path+'sarah_main_droite_3.xml',
           relative_path+'sarah_main_droite_4.xml',
           relative_path+'sarah_main_gauche_1.xml',
           relative_path+'sarah_main_gauche_2.xml',
           relative_path+'sarah_main_gauche_3.xml',
           relative_path+'sarah_main_gauche_4.xml',
           relative_path+'julien_main_droite_1.xml',
           relative_path+'julien_main_droite_2.xml',
           relative_path+'julien_main_droite_3.xml',
           relative_path+'julien_main_droite_4.xml',
           relative_path+'julien_main_gauche_1.xml',
           relative_path+'julien_main_gauche_2.xml',
           relative_path+'julien_main_gauche_3.xml',
           relative_path+'julien_main_gauche_4.xml']

#vect = agregate_feature_vectors_bucket(game_files)
#export_feature_vectors(vect, "kate_data.csv")

In [28]:
game_files=[relative_path+'sarah_main_droite_1.xml',
           relative_path+'sarah_main_droite_2.xml',
           relative_path+'sarah_main_droite_3.xml',
           relative_path+'sarah_main_droite_4.xml',
           relative_path+'sarah_main_gauche_1.xml',
           relative_path+'sarah_main_gauche_2.xml',
           relative_path+'sarah_main_gauche_3.xml',
           relative_path+'sarah_main_gauche_4.xml',
           relative_path+'julien_main_droite_1.xml',
           relative_path+'julien_main_droite_2.xml',
           relative_path+'julien_main_droite_3.xml',
           relative_path+'julien_main_droite_4.xml',
           relative_path+'julien_main_gauche_1.xml',
           relative_path+'julien_main_gauche_2.xml',
           relative_path+'julien_main_gauche_3.xml',
           relative_path+'julien_main_gauche_4.xml']

In [29]:
simple_features_generator(game_files)

([array([ 1.53521177,  0.50628759,  0.49422789,  0.31485612,  1.        ,
          0.53376523, 26.        ,  9.2257024 ,  0.        , 59.48870636,
          7.        ,  7.        ]),
  array([ 1.6208119 ,  0.55947837,  0.48038155,  0.69625488,  0.53050418,
          0.        , 25.        , 18.47879828,  0.26704634, 69.48977584,
          9.        ,  9.        ]),
  array([4.03447647e+00, 4.88199878e-01, 4.33230790e-01, 6.05440055e-01,
         7.23641949e-01, 4.94859847e-01, 4.20000000e+01, 1.45193857e+01,
         1.13491950e-03, 1.20131515e+02, 1.80000000e+01, 1.70000000e+01]),
  array([ 3.2159874 ,  0.47468945,  0.51089448,  0.59741592,  0.50031632,
          0.23386628, 50.        , 16.50962483,  0.57913985, 60.50727337,
         14.        , 14.        ]),
  array([2.92613471e+00, 5.64412568e-01, 5.54957570e-01, 5.19572895e-01,
         9.22321613e-01, 5.11869350e-01, 5.30000000e+01, 1.35814384e+01,
         2.47376921e-02, 5.84604009e+01, 1.70000000e+01, 1.60000000e+01]),
  a

In [32]:
simple_features_bucket_generator(game_files)

([[1.535211765044291,
   0.5062875868910715,
   0.49422789270625,
   0.3148561165290517,
   1.0,
   0.5337652292730164,
   0,
   0,
   1,
   0,
   0.15508325803404777,
   0.0,
   59.488706363734416,
   1,
   0,
   0,
   0,
   1,
   0,
   0,
   0],
  [1.620811901839702,
   0.5594783731290727,
   0.48038154627368423,
   0.69625488127909,
   0.5305041755099504,
   0.0,
   0,
   0,
   1,
   0,
   0.26592110930435015,
   0.0038429587043186135,
   69.48977583786744,
   1,
   0,
   0,
   0,
   1,
   0,
   0,
   0],
  [4.03447646945108,
   0.4881998776046721,
   0.4332307895396227,
   0.6054400551695719,
   0.7236419488720134,
   0.49485984677344297,
   0,
   0,
   0,
   1,
   0.12086242047955828,
   9.44730862150969e-06,
   120.13151535193666,
   0,
   1,
   0,
   0,
   0,
   1,
   0,
   0],
  [3.215987399505982,
   0.4746894472236842,
   0.5108944786311842,
   0.5974159225170302,
   0.5003163225603259,
   0.2338662793327122,
   0,
   0,
   0,
   1,
   0.2728535581472904,
   0.009571408887858