In [1]:
import pandas as pd
import numpy as np
import codecs
import re
import os
from datetime import datetime

### Goal
- Transform the system log into dataframe
- Each row record all activities happened within 1 second
- User actions include watch (default action), add way point, set goal, apply automation, engage payload
    - Apply automation is an one-shot action, user apply the system suggestion and the action finished immediately.

- Description of coding
    - vehicle status
        - 1: moving
        - 2: arrived target
        - 3: engage payload
        

### Set source directory, target file and keys

In [2]:
root_dir = "/Users/chanhsu/Projects/HRI/uav_hsmm/HMM_Experiment/RAW_DATA/TW/HRB_HW/"
csv_file = "/Users/chanhsu/Projects/HRI/uav_hsmm/uav_hsmm/data/data_20221107/TW/TW_HRB_HW.csv"

keys = (
    "file_name", "event_id", "time", 
    "action", "payload_act", "n_correct", "n_incorrect",
    "v1_x", "v1_y", "v2_x", "v2_y", "v3_x", "v3_y", "v4_x", "v4_y", "v5_x", "v5_y",
    "v1_target", "v2_target", "v3_target", "v4_target", "v5_target",
    "v1_status", "v2_status", "v3_status", "v4_status", "v5_status", 
    "ta_x", "ta_y", "tb_x", "tb_y", "tc_x", "tc_y", "td_x", "td_y", "te_x", "te_y", "tf_x", "tf_y", "tg_x", "tg_y",
    "h1_x", "h1_y", "h2_x", "h2_y", "h3_x", "h3_y", "h4_x", "h4_y", "h5_x", "h5_y", 
    "h6_x", "h6_y", "h7_x", "h7_y", "h8_x", "h8_y", "h9_x", "h9_y", "h10_x", "h10_y",
)
event_list = list()

In [3]:
os.listdir(root_dir)

['HRB4_HW.txt_fixed',
 'HRB3_HW.txt_fixed',
 'HRB13_HW.txt_fixed',
 'HRB14_HW.txt_fixed',
 'HRB2_HW.txt_fixed',
 'HRB5_HW.txt_fixed',
 'HRB15_HW.txt_fixed',
 'HRB8_HW.txt_fixed',
 'HRB9_HW.txt_fixed',
 'HRB7_HW.txt_fixed',
 'desktop.ini',
 'HRB10_HW.txt_fixed',
 'HRB1_HW.txt_fixed',
 'HRB6_HW.txt_fixed',
 'HRB16_HW.txt_fixed',
 'HRB11_HW.txt_fixed']

### Extract contents from log files

In [4]:
for file_name in os.listdir(root_dir):
    
    if file_name == "desktop.ini": 
        continue

    ## Initialize values
    event_num = 0
    num_correct = 0
    num_incorrect = 0
    action = None


    ## Load file content
    file_path = root_dir + file_name
    file = codecs.open(file_path, 'r', encoding='utf-8')
    lines  = file.readlines()
    # print(f"""file name: {file_path}""")

    ## remove first line from list (experiment start)
    lines.pop(0)

    new_event = dict(zip(keys, [None] * len(keys)))

    ## Start to transform
    # for i in range(len(lines)):
    for i in range(len(lines)):

        # initialize row
        if i == 0:
            new_event = dict(zip(keys, [None] * len(keys)))
    
        # debug message
        # if i % 10000 == 0:
        #     print(f"""line: {i}, content: {lines[i]}""")

        ## Update action
        if event_num == 0:
            new_event.update({"action": "watch"})
        
        elif re.search('WP add start', lines[i]) is not None:
            new_event.update({"action": "addWP" })
            continue

        elif re.search('WP add end', lines[i]) is not None or re.search('WP add canceled', lines[i]) is not None or re.search('Goal change end', lines[i]) is not None:
            new_event.update({"action": "watch" })
            continue

        elif re.search('Payload Finished', lines[i]) is not None:
            new_event.update({"action": "watch" })
        
        elif re.search('Payload Engaged', lines[i]) is not None:
            new_event.update({"action": "engagePayload" })
            continue
        
        # elif re.search('Payload Finished', lines[i]) is not None:
        #     new_event.update({"action": "watch" })
        #     continue

        elif re.search('Goal set start', lines[i]) is not None:
            new_event.update({"action": "setGoal" })
            continue
        
        # elif re.search('Goal change end', lines[i]) is not None:
        #     new_event.update({"action": "watch" })
        #     continue  
        
        elif re.search('User applies automation', lines[i]) is not None:
            new_event.update({"action": "applyAutomation" })
            continue

        ## Update number of correct and incorrect payload
        if re.search('Payload Finished, INCORRECT', lines[i]) is not None:
            print(lines[i])
            num_incorrect += 1
            
        elif re.search('Payload Finished, CORRECT', lines[i]) is not None:
            print(lines[i])
            num_correct += 1
        
        ## Update payload action: check / hit / safe
        if re.search("User check payload", lines[i]) is not None: 
            new_event.update({
                "payload_act": 0
            })
        elif re.search("User leaves payload by selecting HIT", lines[i]) is not None:
            new_event.update({
                "payload_act": 1
            })
        elif re.search("User leaves payload by selecting SAFE", lines[i]) is not None:
            new_event.update({
                "payload_act": 2
            })
            

        ## Vehicle position, status and target
        if re.search('id: 1', lines[i]) is not None: 
            v1_x = re.sub("\D", "", lines[i].split(" ")[3])
            v1_y = re.sub("\D", "", lines[i].split(" ")[4])

            if len(lines[i].split(" ")) == 11:
                # For status 2 & status 3
                v1_status = re.sub("\D", "", lines[i].split(" ")[8])
                v1_target = lines[i].split(" ")[10]
            elif len(lines[i].split(" ")) >= 13: 
                v1_status = re.sub("\D", "", lines[i].split(" ")[10])
                v1_target = lines[i].split(" ")[12]
            else:
                # For status 1
                v1_status = re.sub("\D", "", lines[i].split(" ")[9])
                v1_target = lines[i].split(" ")[11]

            new_event.update({
                "v1_x": v1_x, 
                "v1_y": v1_y,
                "v1_status": v1_status,
                "v1_target": v1_target})
            continue

        elif re.search('id: 2', lines[i]) is not None: 
            v2_x = re.sub("\D", "", lines[i].split(" ")[3])
            v2_y = re.sub("\D", "", lines[i].split(" ")[4])
            if len(lines[i].split(" ")) <= 11:
                # For status 2 & status 3
                v2_status = re.sub("\D", "", lines[i].split(" ")[8])
                v2_target = lines[i].split(" ")[10]
            elif len(lines[i].split(" ")) >= 13: 
                v2_status = re.sub("\D", "", lines[i].split(" ")[10])
                v2_target = lines[i].split(" ")[12]
            else:
                # For status 1
                v2_status = re.sub("\D", "", lines[i].split(" ")[9])
                v2_target = lines[i].split(" ")[11]

            new_event.update({
                "v2_x": v2_x, 
                "v2_y": v2_y,
                "v2_status": v2_status,
                "v2_target": v2_target})
            continue
                    
        elif re.search('id: 3', lines[i]) is not None: 
            v3_x = re.sub("\D", "", lines[i].split(" ")[3])
            v3_y = re.sub("\D", "", lines[i].split(" ")[4])
            if len(lines[i].split(" ")) <= 11:
                # For status 2 & status 3
                v3_status = re.sub("\D", "", lines[i].split(" ")[8])
                v3_target = lines[i].split(" ")[10]
            elif len(lines[i].split(" ")) >= 13: 
                v3_status = re.sub("\D", "", lines[i].split(" ")[10])
                v3_target = lines[i].split(" ")[12]
            else:
                # For status 1
                v3_status = re.sub("\D", "", lines[i].split(" ")[9])
                v3_target = lines[i].split(" ")[11]

            new_event.update({
                "v3_x": v3_x, 
                "v3_y": v3_y,
                "v3_status": v3_status,
                "v3_target": v3_target})
            continue
                    
        elif re.search('id: 4', lines[i]) is not None: 
            v4_x = re.sub("\D", "", lines[i].split(" ")[3])
            v4_y = re.sub("\D", "", lines[i].split(" ")[4])
            if len(lines[i].split(" ")) <= 11:
                # For status 2 & status 3
                v4_status = re.sub("\D", "", lines[i].split(" ")[8])
                v4_target = lines[i].split(" ")[10]
            elif len(lines[i].split(" ")) >= 13: 
                v4_status = re.sub("\D", "", lines[i].split(" ")[10])
                v4_target = lines[i].split(" ")[12]
            else:
                # For status 1
                v4_status = re.sub("\D", "", lines[i].split(" ")[9])
                v4_target = lines[i].split(" ")[11]

            new_event.update({
                "v4_x": v4_x, 
                "v4_y": v4_y,
                "v4_status": v4_status,
                "v4_target": v4_target})
            continue

        elif re.search('id: 5', lines[i]) is not None: 
            v5_x = re.sub("\D", "", lines[i].split(" ")[3])
            v5_y = re.sub("\D", "", lines[i].split(" ")[4])
            if len(lines[i].split(" ")) <= 11:
                # For status 2 & status 3
                v5_status = re.sub("\D", "", lines[i].split(" ")[8])
                v5_target = lines[i].split(" ")[10]
            elif len(lines[i].split(" ")) >= 13:
                v5_status = re.sub("\D", "", lines[i].split(" ")[10])
                v5_target = lines[i].split(" ")[12]
            else:
                # For status 1
                v5_status = re.sub("\D", "", lines[i].split(" ")[9])
                v5_target = lines[i].split(" ")[11]

            new_event.update({
                "v5_x": v5_x, 
                "v5_y": v5_y,
                "v5_status": v5_status,
                "v5_target": v5_target})
            continue


        ## Target position
        if re.search("name: A", lines[i]) is not None:
            ta_x = re.sub("\D", "", lines[i].split(" ")[3])
            ta_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "ta_x": ta_x,
                "ta_y": ta_y
            })
            continue

        elif re.search("name: B", lines[i]) is not None:
            tb_x = re.sub("\D", "", lines[i].split(" ")[3])
            tb_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "tb_x": tb_x,
                "tb_y": tb_y
            })
            continue

        elif re.search("name: C", lines[i]) is not None:
            tc_x = re.sub("\D", "", lines[i].split(" ")[3])
            tc_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "tc_x": tc_x,
                "tc_y": tc_y
            })
            continue

        elif re.search("name: D", lines[i]) is not None:
            td_x = re.sub("\D", "", lines[i].split(" ")[3])
            td_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "td_x": td_x,
                "td_y": td_y
            })
            continue

        elif re.search("name: E", lines[i]) is not None:
            te_x = re.sub("\D", "", lines[i].split(" ")[3])
            te_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "te_x": te_x,
                "te_y": te_y
            })
            continue

        elif re.search("name: F", lines[i]) is not None:
            tf_x = re.sub("\D", "", lines[i].split(" ")[3])
            tf_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "tf_x": tf_x,
                "tf_y": tf_y
            })
            continue
        
        elif re.search("name: G", lines[i]) is not None:
            tg_x = re.sub("\D", "", lines[i].split(" ")[3])
            tg_y = re.sub("\D", "", lines[i].split(" ")[4])
            new_event.update({
                "tg_x": tg_x,
                "tg_y": tg_y
            })
            continue

        
        ## Harzard position
        if re.search("Hazards:", lines[i-1]) is not None:
            new_event.update({
                "h1_x": re.sub("\D", "", lines[i].split(" ")[0]), "h1_y": re.sub("\D", "", lines[i].split(" ")[1]),
                "h2_x": re.sub("\D", "", lines[i].split(" ")[2]), "h2_y": re.sub("\D", "", lines[i].split(" ")[3]),
                "h3_x": re.sub("\D", "", lines[i].split(" ")[4]), "h3_y": re.sub("\D", "", lines[i].split(" ")[5]),
                "h4_x": re.sub("\D", "", lines[i].split(" ")[6]), "h4_y": re.sub("\D", "", lines[i].split(" ")[7]),
                "h5_x": re.sub("\D", "", lines[i].split(" ")[8]), "h5_y": re.sub("\D", "", lines[i].split(" ")[9]),
                "h6_x": re.sub("\D", "", lines[i].split(" ")[10]), "h6_y": re.sub("\D", "", lines[i].split(" ")[11]),
                "h7_x": re.sub("\D", "", lines[i].split(" ")[12]), "h7_y": re.sub("\D", "", lines[i].split(" ")[13]),
                "h8_x": re.sub("\D", "", lines[i].split(" ")[14]), "h8_y": re.sub("\D", "", lines[i].split(" ")[15]),
                "h9_x": re.sub("\D", "", lines[i].split(" ")[16]), "h9_y": re.sub("\D", "", lines[i].split(" ")[17]),
                "h10_x": re.sub("\D", "", lines[i].split(" ")[18]), "h10_y": re.sub("\D", "", lines[i].split(" ")[19])
            })

        ## if the line include "System Log", then create a new event
        if re.search("System Log", lines[i]) is not None:

            if new_event["event_id"] is not None:
                # If new_event not empty and the current line include "System Log", 
                # then append current new_event to event list and create a new new_event

                event_list.append(new_event)
                new_event = dict(zip(keys, [None] * len(keys)))
                event_num += 1

            event_time = lines[i].split(" ")[0] + " " + lines[i].split(" ")[1]

            new_event.update({
                "file_name": file_name,
                "event_id": event_num, 
                "time": event_time,
                "n_correct": num_correct,
                "n_incorrect": num_incorrect})


2015-05-04 13:56:07.706    Vehicle: 2 Payload Finished, CORRECT -1,-1

2015-05-04 13:56:18.047    Vehicle: 3 Payload Finished, CORRECT -1,-1

2015-05-04 13:56:27.883    Vehicle: 1 Payload Finished, CORRECT -1,-1

2015-05-04 13:56:41.983    Vehicle: 5 Payload Finished, INCORRECT -1,-1

2015-05-04 13:57:03.925    Vehicle: 2 Payload Finished, CORRECT -1,-1

2015-05-04 13:57:15.675    Vehicle: 4 Payload Finished, CORRECT -1,-1

2015-05-04 13:57:31.737    Vehicle: 3 Payload Finished, INCORRECT -1,-1

2015-05-04 13:57:48.907    Vehicle: 1 Payload Finished, CORRECT -1,-1

2015-05-04 13:58:07.552    Vehicle: 2 Payload Finished, CORRECT -1,-1

2015-05-04 13:58:24.406    Vehicle: 4 Payload Finished, CORRECT -1,-1

2015-05-04 13:58:42.706    Vehicle: 1 Payload Finished, CORRECT -1,-1

2015-05-04 13:58:50.372    Vehicle: 5 Payload Finished, CORRECT -1,-1

2015-05-04 13:59:18.502    Vehicle: 4 Payload Finished, INCORRECT -1,-1

2015-05-04 13:59:25.27    Vehicle: 3 Payload Finished, CORRECT -1,-1

2

### tranform the list to pandas dataframe

In [5]:
df = pd.DataFrame(event_list)

df.to_csv(csv_file, sep=",")