In [1]:
import datetime
import os  # OS operations (read/write files/folders)

import pandas as pd  # operate with dataframes
from tqdm.notebook import tqdm  # mother of progressbars for Python

In [2]:
PATH = "../data/raw/EventData"

# progress bar customized format
B_FORMAT = """📄 {n_fmt} of {total_fmt} {desc} processed: {bar} 
            {percentage:3.0f}% ⏱️{elapsed} ⏳{remaining} ⚙️{rate_fmt}{postfix}"""

In [3]:
part_raw = pd.read_csv("../participants_raw.csv")
part_raw = part_raw.set_index("id")
part_raw = part_raw.rename(columns={"created": "date"})
part_raw

Unnamed: 0_level_0,date,expo,side
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
155e035d01544c8b82152d6831787980,01.06.2019 11:38,MSW,left
0382fc9d07994096ae4853e8b177f3ca,01.06.2019 12:02,MSW,left
9a7cf99094a7446eab3d27841d672fd5,01.06.2019 12:02,MSW,right
ddd3ef19c983441db648c17d46595a52,01.06.2019 12:10,MSW,right
c226385e176a49c8af9a854554c3b403,01.06.2019 12:11,MSW,left
...,...,...,...
6d04c2f469cb42a5bc3da098df4720eb,31.10.2019 16:26,BMBF,right
6a86150f0826424c9d22eedffeca2e27,31.10.2019 18:36,BMBF,left
62abc20d51b04f8abb93bc6823d59ee4,31.10.2019 18:36,BMBF,right
aa3e2f14e2f94012aceed69128b40d21,31.10.2019 18:42,BMBF,left


In [4]:
# get all event filenames
evs = os.listdir(PATH)
print(len(evs))

# filter not needed since it removes half of the files for some reason
# for f in evs:
#     # filter hidden/config files and folders
#     if f.startswith(".") or not f.endswith(".raw"):
#         evs.remove(f)  # remove hidden/config file
# len(evs)

26572


In [5]:
uids = part_raw.index.tolist()

# raycast progress bar
files_pbar = tqdm(
    evs,
    total=len(evs),
    desc="🧾 participants",
    dynamic_ncols=True,
    bar_format=B_FORMAT,
)

cnt = 0

part_raw["condition"] = ""

for f in files_pbar:
    uid = f.split("-")[2].split(".")[0]
    if uid in uids:
        # parse uid and condition from filename
        cur = f.split(".raw")[0]  # remove file extension
        cur = cur.split("-")  # split filename by "-"
        condition = cur[-2]
        date = os.path.getmtime(f"{PATH}/{f}")  # creation timestamp
        date = datetime.datetime.fromtimestamp(date)  # translate to dt
        date = date.strftime("%Y-%m-%d %H:%M")  # arrange it
        # store it
        part_raw.loc[uid, ["date", "condition"]] = [date, condition]

part_raw

📄 0 of 26572 🧾 participants processed:                                                                        …

Unnamed: 0_level_0,date,expo,side,condition
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
155e035d01544c8b82152d6831787980,2019-06-01 11:38,MSW,left,RadioTalk
0382fc9d07994096ae4853e8b177f3ca,2019-06-01 12:02,MSW,left,RadioTalk
9a7cf99094a7446eab3d27841d672fd5,2019-06-01 12:02,MSW,right,TaxiDriver
ddd3ef19c983441db648c17d46595a52,2019-06-01 12:10,MSW,right,AVAS
c226385e176a49c8af9a854554c3b403,2019-06-01 12:11,MSW,left,RadioTalk
...,...,...,...,...
6d04c2f469cb42a5bc3da098df4720eb,2019-10-31 16:26,BMBF,right,AVAS
6a86150f0826424c9d22eedffeca2e27,2019-10-31 18:36,BMBF,left,RadioTalk
62abc20d51b04f8abb93bc6823d59ee4,2019-10-31 18:36,BMBF,right,RadioTalk
aa3e2f14e2f94012aceed69128b40d21,2019-10-31 18:42,BMBF,left,TaxiDriver


In [6]:
# set date column as standard datetime format
part_raw.date = pd.to_datetime(part_raw.date)

# ensure participants ordering by date
part_raw = part_raw.sort_values(by="date")

part_raw

Unnamed: 0_level_0,date,expo,side,condition
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
d2ae34df3118440cb66b5c27ade904f1,2019-05-09 19:34:00,MSW,right,TaxiDriver
b8d1f4aa336e433891d26271ee3f2e8f,2019-05-09 19:56:00,MSW,right,TaxiDriver
3622dbec36ac48bd9ad1159270e8d01f,2019-05-09 19:59:00,MSW,left,TaxiDriver
10c30561d9fb4f40bd4b95da3264f421,2019-05-09 19:59:00,MSW,right,TaxiDriver
1f55858f8a7d4d8da568cc93ee196f43,2019-05-09 20:48:00,MSW,right,AVAS
...,...,...,...,...
611d35a7c3e940cc82495e53c2a8532d,2020-01-03 16:12:00,BMBF,right,TaxiDriver
3b6fda285d9e412eb081986b2f22a2e3,2020-01-03 16:13:00,BMBF,left,AVAS
18ffb0abdc8642098c479380bfa533d1,2020-01-03 16:15:00,BMBF,left,RadioTalk
c06f123b35b74bb489ec239b1cac9eb4,2020-01-03 16:16:00,BMBF,right,TaxiDriver


In [7]:
part_raw.to_csv("../participants_raw.csv")