In [39]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [40]:
import glob
import re
import pandas as pd
import os
import time

In [41]:
def extract_basic_info(string):
    if "timestamp" not in string:
        return "TIMESTAMP_MISSING"
    if "tagged_device_id" not in string:
        return "TAGGED_DEVICE_ID_MISSING"
    if string.count("device_id") != 2:
        return "DEVICE_ID_MISSING"
    
    values = re.findall(r"\((.*?)\)", string)
    return values

def extract_spo2_data(string):
    if "SampleCounter" in string:
        return None
    if "Footer" in string:
        return None
    raw_data = string.split(", ")[0].split(": ")[1]
    values = re.findall(r"\((.*?)\)", string.split(", ")[1])
    signalStrength = int(values[0])
    probe = int(values[1])
    finger = int(values[2])
    spo2 = int(values[3])
    pulse = int(values[4])
    return raw_data, signalStrength, probe, finger, spo2, pulse

In [42]:
spo2_data_folder = "ppg/"
output_folder = "ppg_tables/"

if not os.path.exists(output_folder):
    os.mkdir(output_folder)

In [43]:
files = glob.glob(spo2_data_folder + "*")
print("Files found", *files, sep='\n')

Files found
ppg/f69b61e2-8663-42a1-8b61-3cc99a30accf_00956.doz
ppg/f69b61e2-8663-42a1-8b61-3cc99a30accf_00955.doz


In [50]:
start_total = time.time()
print(f"Total files : {len(files)}")
file_num = 0
for file in files:
    start_file = time.time()
#     df = pd.DataFrame()
    data_dict = {"RAW" : [],
                "Signal" : [],
                "Probe" : [],
                "Finger" : [],
                "SPO2" : [],
                "Pulse" : []}
    file_num += 1
    with open(file, "rb") as fp:
        line = b''
        while True:
            next_char = fp.read(1)
            line += next_char
            if next_char == b'\n':
                print("Read a line")
                break
        cnt = 1
        while line:
#             print("Line {}: {}".format(cnt, line.strip()))
            if cnt == 1:
                start = time.time()
                basic_info = extract_basic_info(line.strip())
                end = time.time()
#                 print("Time taken to extract basic info =", end - start)
            else:
                start = time.time()
                spo2_data = extract_spo2_data(line.strip())
                end = time.time()
#                 print("Time taken to extract basic data =", end - start)
                if spo2_data is not None:
                    start = time.time()
                    data_dict["RAW"].append(spo2_data[0])
                    data_dict["Signal"].append(spo2_data[1])
                    data_dict["Probe"].append(spo2_data[2])
                    data_dict["Finger"].append(spo2_data[3])
                    data_dict["SPO2"].append(spo2_data[4])
                    data_dict["Pulse"].append(spo2_data[5])
                    end = time.time()
#                     print("Time taken to append data =", end - start)
            cnt += 1
            line = b''
            while True:
                next_char = fp.read(1)
                line += next_char
                if next_char == 10:
                    print("Read a line")
                    break
            break
    start = time.time()
    df = pd.DataFrame(data_dict)
    df.to_csv(output_folder + file.replace("ppg/", ""))
    end = time.time()
#     print("Time taken to prepare DataFrame and export csv =", end - start)
    print(f"Exported File number {file_num}, Filename {file}")
    end = time.time()
    print(f"Total time taken for file = {int((end - start) * 1000)} ms")
    
end_total = time.time()
print(f"Total time taken for all files = {int((end_total - start_total))} s")

Total files : 2


KeyboardInterrupt: 