In [6]:
import numpy as np
import copy
import json
import os
import csv

In [7]:
#   {
#     "node_id": 126,
#     "event_type": "set_destination",
#     "posX": 45,
#     "posY": 205,
#     "dest_posX": 55,
#     "dest_posY": 205,
#     "unit": "T4_P1",
#     "timestamp": 11830
#   }

In [8]:
loc_rand_limit = 30
loc_coordinates = {}

In [9]:
PLAYER_ID = 1
MATCH_ID = 100000

In [10]:
ACTIVE_EVENTS = ['PDAUSE', 'WORKSHEET', 'QUIZ', 'PICKUP', 'DROP', 'TESTCOMPUTER', 'TESTOBJECT', \
                'LABELING', 'RETRIEVEITEM', 'STOWITEM', 'BOOKREAD', 'DIALOGUE', 'TALK', 'MOVE']
IGNORE_EVENTS = ['OPEN', 'CLOSE', 'SOLUTION', 'GOALCOMPLETE', 'NOTES']

In [61]:
event_set = set()

In [70]:
NODE_ID = 1
MATCH_DATA = []
LABELING = []
TEST = []
QUIZ = []
PDA = []
WORK = []
DIALOGUE = []
TIMESTAMP_MIN = 99999999
TIMESTAMP_MAX = -1

MATCH_CONFIG = []

In [69]:
def INITIALIZE():
    global NODE_ID, MATCH_DATA, LABELING, TEST, QUIZ, PDA, WORK, DIALOGUE, TIMESTAMP_MAX, TIMESTAMP_MIN
    NODE_ID = 1
    MATCH_DATA = []
    LABELING = []
    TEST = []
    QUIZ = []
    PDA = []
    WORK = []
    DIALOGUE = []
    
    TIMESTAMP_MIN = 99999999
    TIMESTAMP_MAX = -1

In [14]:
def perturb_location(location, event, timestamp):
    if event != 'status_update':
        x, y = location
        x += np.random.randint(-15, 15)
        y += np.random.randint(-15, 15)
        return (x, y)
    
    return location

In [64]:
def get_timestamp(timestamp):
    return int(np.round(float(timestamp)))

In [72]:
def set_timestamp_min_max(timestamp):
    global TIMESTAMP_MIN, TIMESTAMP_MAX
    
    if timestamp < TIMESTAMP_MIN:
        TIMESTAMP_MIN = timestamp
    
    if timestamp > TIMESTAMP_MAX:
        TIMESTAMP_MAX = timestamp

In [79]:
def create_node(event, location, timestamp, additional=None):
    global NODE_ID, MATCH_DATA
    
    loc = perturb_location(location, event, timestamp)
    
    tstamp = get_timestamp(timestamp)
    set_timestamp_min_max(tstamp)
    
    node = {}
    node['node_id'] = NODE_ID
    node['event_type'] = event
    node['posX'] = loc[0]
    node['posY'] = loc[1]
    node['unit'] = 'P'+str(PLAYER_ID)
    
    node['timestamp'] = tstamp
    
    if additional is not None:
        node.update(additional)
    
    MATCH_DATA.append(node)
    
    ### add event to event_set
    if event not in event_set:
        event_set.add(event)
    
    NODE_ID += 1

In [16]:
def get_coordinate(location):
    if location in loc_coordinates:
        x, y = loc_coordinates[location]
        return (int(x), int(y))
    
    return loc_coordinates['default']

In [17]:
def merge_labeling(CL, event):
    timestamp = CL[0][1]
    loc_xy = CL[0][2]
    end_timestamp = CL[-1][1]
    l_type = set()
    correct = 0
    incorrect = 0
    for c in CL:
        l_type.add(c[3]['type'])
        if c[3]['result'] == 'correct':
            correct += 1
        else:
            incorrect += 1
    
    if len(list(l_type)) == 1:
        lt = list(l_type)[0]
    
    create_node(event, loc_xy, timestamp, {'end_timestamp': end_timestamp, 'type': lt, 
                                          'correct': correct, 'incorrect': incorrect})

In [18]:
def collapse_labeling(CL):
    start = 0
    end = 1
    for i in range(len(CL)-1):
        e1, e2 = CL[i], CL[i+1]
        if e1[0]+1 != e2[0]:
            merge_labeling(CL[start:end], 'labeling')
            start = i+1
            end = i+2
        else:
            end += 1

    merge_labeling(CL[start:end], 'labeling')

In [19]:
def handle_labeling(rowdata, i, loc_xy):
    timestamp = rowdata[1]
    info = {'type': rowdata[2], 'result': rowdata[-1]}
    
    LABELING.append((i, timestamp, loc_xy, info))

In [20]:
def merge_quiz(CL, event):
    timestamp = CL[0][1]
    loc_xy = CL[0][2]
    end_timestamp = CL[-1][1]
    l_type = set()
    ques = set()
    correct = 0
    incorrect = 0
    for c in CL:
        if c[3] is None or c[3]['type']=='quiz-answer':
            continue

        typ = c[3]['type'].split('-')[0]
        l_type.add(typ)
        q = c[3]['type'].split('-')[2]
        ques.add(q)
        
        if c[3]['result'] == 'correct':
            correct += 1
        else:
            incorrect += 1
    
    lt = None
    if len(list(l_type)) == 1:
        lt = list(l_type)[0]
    
    create_node(event, loc_xy, timestamp, {'end_timestamp': end_timestamp, 'type': lt, 
                                           'question': list(ques),
                                           'correct': correct, 'incorrect': incorrect})

In [21]:
def collapse_quiz(CL):
    start = 0
    end = 1
    for i in range(len(CL)-1):
        e1, e2 = CL[i], CL[i+1]
        if e1[0]+1 != e2[0]:
            merge_quiz(CL[start:end], 'quiz')
            start = i+1
            end = i+2
        else:
            end += 1
    merge_quiz(CL[start:end], 'quiz')

In [22]:
def handle_quiz(rowdata, i, loc_xy):
    timestamp = rowdata[1]
    if len(rowdata) > 6:
        info = {'type': rowdata[2], 'iteration': rowdata[-3], 'result': rowdata[-2]}
    else:
        info = None

    QUIZ.append((i, timestamp, loc_xy, info))

In [23]:
def merge_worksheet(CL, event):
    timestamp = CL[0][1]
    loc_xy = CL[0][2]
    end_timestamp = CL[-1][1]
    info = {'end_timestamp': end_timestamp} 
    
    for c in CL:
        for key in c[3]:
            if key not in info:
                info[key] = [c[3][key]]
            else:
                info[key].append(c[3][key])
        
    create_node(event, loc_xy, timestamp, info)

In [24]:
def collapse_worksheet(CL):
    start = 0
    end = 1
    for i in range(len(CL)-1):
        e1, e2 = CL[i], CL[i+1]
        if e1[0]+1 != e2[0]:
            merge_worksheet(CL[start:end], 'worksheet')
            start = i+1
            end = i+2
        else:
            end += 1
    merge_worksheet(CL[start:end], 'worksheet')

In [25]:
def handle_worksheet(rowdata, i, loc_xy):
    timestamp = rowdata[1]
    if len(rowdata) < 6:
        info = {'type': rowdata[2], 'field': rowdata[-2], 'entry': rowdata[-1]}
    else:
        info = {'type': rowdata[2]}

    WORK.append((i, timestamp, loc_xy, info))

In [26]:
def merge_pdause(CL, event):
    timestamp = CL[0][1]
    loc_xy = CL[0][2]
    end_timestamp = CL[-1][1]
    use = []
    action = []
    for c in CL:
        use.append(c[3]['use'])
        action.append(c[3]['action'])
    
    create_node(event, loc_xy, timestamp, {'end_timestamp': end_timestamp, 'use': use,  
                                           'action': action})

In [27]:
def collapse_pdause(CL):
    start = 0
    end = 1
    for i in range(len(CL)-1):
        e1, e2 = CL[i], CL[i+1]
        if e1[0]+1 != e2[0]:
            merge_pdause(CL[start:end], 'pdause')
            start = i+1
            end = i+2
        else:
            end += 1

    merge_pdause(CL[start:end], 'pdause')

In [28]:
def handle_pdause(rowdata, i, loc_xy):
    timestamp = rowdata[1]
    info = {'use': rowdata[2], 'action': rowdata[3]}
    
    PDA.append((i, timestamp, loc_xy, info))

In [29]:
def merge_test(CL, event):
    timestamp = CL[0][1]
    loc_xy = CL[0][2]
    
    info = CL[0][3]
    if len(CL) > 1:
        info.update(CL[1][3])
    else:
        info.update({'object': 'Null', 'result': 'Null'})
    
    create_node(event, loc_xy, timestamp, info)

In [30]:
def collapse_test(CL):
    i = 0
    while(i<len(CL)-1):
        e1, e2 = CL[i], CL[i+1]
        if e1[0] == 'TESTCOMPUTER' and e2[0] == 'TESTOBJECT':
            merge_test(CL[i:i+2], 'test')
            i += 1
        else:
            merge_test(CL[i:i+1], 'test')
        i+=1

In [31]:
def handle_test(rowdata, i, loc_xy):
    timestamp = rowdata[1]
    
    if rowdata[0] == 'TESTCOMPUTER':
        info = {'type': rowdata[2], 
                'hypothesis': rowdata[3].split(':')[1].strip(),
                'reason': rowdata[4].split(':')[1].strip(),
                'testremaining': rowdata[-1].split('-')[1].strip()}
        
        TEST.append(('TESTCOMPUTER', timestamp, loc_xy, info))
        
    else:
        try:
            info = {'object': rowdata[2].split('object-')[1].strip(), 'result': rowdata[-1]}
        except:
            info = {'object': rowdata[2], 'result': rowdata[-1]}
        
        TEST.append(('TESTOBJECT', timestamp, loc_xy, info))

In [32]:
def handle_move(rowdata):
    loc = None
    timestamp = rowdata[1]
    for info in rowdata:
        if 'cur-loc-' in info:
            loc = info.split('cur-loc-')[1]
            loc_xy = get_coordinate(loc)
    
    create_node('status_update', loc_xy, timestamp)
    
    return loc_xy

In [33]:
def merge_talk(CL, event):
    timestamp = CL[0][1]
    loc_xy = CL[0][2]
    end_timestamp = CL[-1][1]
    info = {'end_timestamp': end_timestamp} 
    
    for c in CL:
        for key in c[3]:
            if key not in info:
                info[key] = [c[3][key]]
            else:
                info[key].append(c[3][key])
    
    create_node(event, loc_xy, timestamp, info)

In [34]:
def collapse_talk(CL):
    start = 0
    end = 1
    for i in range(len(CL)-1):
        e1, e2 = CL[i], CL[i+1]
        if e1[0]+1 != e2[0]:
            merge_talk(CL[start:end], 'dialog')
            start = i+1
            end = i+2
        else:
            end += 1

    merge_talk(CL[start:end], 'dialog')

In [35]:
def handle_talk(rowdata, i, loc_xy):
    timestamp = rowdata[1]
    if len(rowdata) < 6:
        info = {'dialog': rowdata[-2]+': '+rowdata[-1]}
    else:
        info = {'dialog': 'player: '+rowdata[-1]}

    DIALOGUE.append((i, timestamp, loc_xy, info))

In [36]:
def handle_bookread(rowdata, loc_xy):
    item = None
    timestamp = rowdata[1]
    item = rowdata[2]
    duration = rowdata[5].split('duration-')[1]
    
    create_node('bookread', loc_xy, timestamp, {'duration': duration, 'item': item})

In [37]:
def handle_stowitem(rowdata, loc_xy):
    item = None
    timestamp = rowdata[1]
    item = rowdata[2]
    
    create_node('stowitem', loc_xy, timestamp, {'item': item})

In [38]:
def handle_retrieveitem(rowdata, loc_xy):
    item = None
    timestamp = rowdata[1]
    duration = rowdata[2].split('duration-')[1]
    item = rowdata[3]
    
    create_node('retrieveitem', loc_xy, timestamp, {'duration': duration, 'item': item})

In [39]:
def handle_drop(rowdata):
    loc = None
    item = None
    timestamp = rowdata[1]
    for info in rowdata:
        if 'cur-loc-' in info:
            loc = info.split('cur-loc-')[1]
            loc_xy = get_coordinate(loc)
        
        if 'cur-action-drop-' in info:
            item = info.split('cur-action-drop-')[1]
    
    create_node('drop', loc_xy, timestamp, {'item': item})
    
    return loc_xy

In [40]:
def handle_pickup(rowdata):
    loc = None
    item = None
    timestamp = rowdata[1]
    for info in rowdata:
        if 'cur-loc-' in info:
            loc = info.split('cur-loc-')[1]
            loc_xy = get_coordinate(loc)
        
        if 'cur-action-pickup-' in info:
            item = info.split('cur-action-pickup-')[1]
    
    create_node('pickup', loc_xy, timestamp, {'item': item})
    
    return loc_xy

In [41]:
def process_single_data_file(data):
    i = 0
    current_location = get_coordinate('default')
    for row in data:
        i += 1
        attribs = row.split('\n')[0]
        attribs = attribs.split('|')
        
        if attribs[0] in IGNORE_EVENTS:
            continue
            
        if attribs[0] == 'MOVE':
            current_location = handle_move(attribs)
        elif attribs[0] == 'DIALOG':  # also handles dialogue
            handle_talk(attribs, i, current_location)
        elif attribs[0] == 'BOOKREAD':
            handle_bookread(attribs, current_location)
        elif attribs[0] == 'STOWITEM':
            handle_stowitem(attribs, current_location)
        elif attribs[0] == 'RETRIEVEITEM':
            handle_retrieveitem(attribs, current_location)
        elif attribs[0] == 'LABELING':
            handle_labeling(attribs, i, current_location)
        elif attribs[0] == 'TESTCOMPUTER' or attribs[0] == 'TESTOBJECT':  # also handles testobject
            handle_test(attribs, i, current_location)
        elif attribs[0] == 'DROP':
            current_location = handle_drop(attribs)
        elif attribs[0] == 'PICKUP':
            current_location = handle_pickup(attribs)
        elif attribs[0] == 'QUIZ':
            handle_quiz(attribs, i, current_location)
        elif attribs[0] == 'WORKSHEET':
            handle_worksheet(attribs, i, current_location)
        elif attribs[0] == 'PDAUSE':
            handle_pdause(attribs, i, current_location)

In [42]:
def file_name_extension(fname):
    name_ext = fname.rsplit('.', 1)
    return name_ext[0], name_ext[1]

In [77]:
def set_match_config(file_name, match_id, player_id, timestamp_range):
#     {
#       "name": "",
#       "group": "",
#       "color": "#AB0022"
#     }
    with open('./data/config_skeleton.json', 'r') as json_config:
        config = json.load(json_config)
        
        config['file_name'] = file_name
        config['match_id'] = match_id
        config['timestamp_range']['start'] = timestamp_range[0]
        config['timestamp_range']['end'] = timestamp_range[1]
        unit = {
                  "name": 'P'+str(player_id),
                  "group": "T",
                  "color": "#AB0022"
                }
        config['units'] = [unit]
        config['load_settings']['selected_units'] = ['P'+str(player_id)]
        
        MATCH_CONFIG.append(config)

In [44]:
def read_location_coordinates(file, set_defalut=False):
    if set_defalut:
        loc_coordinates['default'] = (2600, 1444)
    with open(file, 'r') as data_file:
        for row in data_file:
            loc_info = row.split('\n')[0]
            loc_info = loc_info.split(' ')
            loc_coordinates[loc_info[0]] = (loc_info[1], loc_info[2])

In [75]:
def process_files(in_dir, out_dir):
    global PLAYER_ID, MATCH_ID, MATCH_CONFIG

    MATCH_CONFIG = []
    PLAYER_ID = 1
    
    loc_file = './data/ci_locations.txt'
    read_location_coordinates(loc_file, set_defalut=True)
    
    file_count = 1
    for root, dirs, files in os.walk(in_dir):
        for file in files:
            fname, ext = file_name_extension(file)
            if ext == 'log' and '.' not in fname:
                INITIALIZE()
                print(fname, 'Player ID:', PLAYER_ID)
                with open(os.path.join(root, file), 'r') as data_file:
                    process_single_data_file(data_file)
                    if len (LABELING) != 0: 
                        collapse_labeling(LABELING)
                    if len (TEST) != 0: 
                        collapse_test(TEST)
                    if len (QUIZ) != 0: 
                        collapse_quiz(QUIZ)
                    if len (PDA) != 0: 
                        collapse_pdause(PDA)
                    if len (WORK) != 0: 
                        collapse_worksheet(WORK)
                    if len (DIALOGUE) != 0: 
                        collapse_talk(DIALOGUE)
                    
                file_count += 1
                
                with open(out_dir+'/player_'+str(PLAYER_ID)+'.json', 'w') as json_file:
                    json.dump(MATCH_DATA, json_file)
                
                set_match_config(fname, MATCH_ID, PLAYER_ID, [TIMESTAMP_MIN, TIMESTAMP_MAX])
                
                PLAYER_ID += 1
                MATCH_ID += 1
                
    with open(out_dir+'match_config.json', 'w') as json_file:
        json.dump(MATCH_CONFIG, json_file)

In [80]:
in_dir = './data/goal_recognition/selected_data/' #Interaction Logs/'
out_dir = './data/stratmapper_format/'
process_files(in_dir, out_dir)

Session2-StationK_1 Player ID: 1
Session4-StationJJ_1 Player ID: 2
Session1-StationS_1 Player ID: 3
Session2-StationRR_1 Player ID: 4
Session3-StationV_1 Player ID: 5
Session2-StationAA_1 Player ID: 6
Session3-StationHH_1 Player ID: 7
Session4-StationP_1 Player ID: 8
Session2-StationLL_1 Player ID: 9
Session2-StationCC_1 Player ID: 10
Session1-StationKK_1 Player ID: 11
Session3-StationK_1 Player ID: 12
Session4-StationTT_1 Player ID: 13
Session3-StationJJ_1 Player ID: 14
Session2-StationS_1 Player ID: 15
Session3-StationW_1 Player ID: 16
Session3-StationLL_1 Player ID: 17
Session4-StationRR_1 Player ID: 18
Session3-StationPP_1 Player ID: 19
Session2-StationQ_1 Player ID: 20
Session2-StationGG_1 Player ID: 21


In [None]:
loc_coordinates = {}
loc_file = './data/ci_locations.txt'
read_location_coordinates(loc_file, set_defalut=True)

In [None]:
def get_distance(c1, c2):
    return ((c1[0]-c2[0])**2 + (c1[1]-c2[1])**2)**0.5

In [None]:
locs = [x for x in loc_coordinates]
min_distance = 9999999999
for i in range(len(locs)):
    for j in range(i+1, len(locs)):
        c1 = get_coordinate(locs[i])
        c2 = get_coordinate(locs[j])
        distance = get_distance(c1, c2)
        if distance < min_distance:
            min_distance = distance
        

print(min_distance/2)

In [63]:
event_set

{'bookread',
 'dialog',
 'drop',
 'labeling',
 'pdause',
 'pickup',
 'quiz',
 'retrieveitem',
 'status_update',
 'stowitem',
 'test',
 'worksheet'}