In [1]:
import glob
import os
import pandas as pd
from tqdm import tqdm
import datetime
import pytz
import pathlib

basedir = '../data/raw'

In [4]:
ecgs = pd.DataFrame(columns=['patient_group', 'patient_id', 'timestamp', 'type', 'filepath'])
for patient_group in ['covid', 'postcovid', 'ctrl']:
    filepaths = list(glob.glob(os.path.join(basedir, f'ecgs_{patient_group}', '*.txt')))
    for file in filepaths:
        file_name_split = pathlib.Path(file).name.split('-')
        patient_id = int(file_name_split[0])
        ecg_type = file_name_split[-2]
        ecg_timestamp = datetime.datetime(
            year=int(file_name_split[1]), month=int(file_name_split[2]), day=int(file_name_split[3]), 
            hour=int(file_name_split[4]), minute=int(file_name_split[5]),
            tzinfo=pytz.timezone("Europe/Berlin"))
        
        ecgs = pd.concat([ecgs, pd.DataFrame({'patient_group': patient_group, 'filepath': file, 'patient_id': patient_id, 'timestamp': ecg_timestamp, 'type': ecg_type}, index=[0])], axis=0, ignore_index=True)

print(f"In total {ecgs.shape[0]} ECGs from {len(ecgs.patient_id.unique())} unique patients")
print()
print(ecgs.patient_group.value_counts())
print()
print(ecgs.type.value_counts())
print()
ecgs.to_csv('../data/interim/ecgs_list.csv', index=False, sep=';')
ecgs

In total 1603 ECGs from 270 unique patients

covid        941
postcovid    662
Name: patient_group, dtype: int64

Belastungs    1280
Ruhe           323
Name: type, dtype: int64



Unnamed: 0,patient_group,patient_id,timestamp,type,filepath
0,covid,26472,2021-10-13 12:55:00+02:00,Ruhe,../data/raw/ecgs_covid/26472-2021-10-13-11-48-...
1,covid,31430,2021-08-06 11:11:00+02:00,Ruhe,../data/raw/ecgs_covid/31430-2021-08-06-10-04-...
2,covid,32344,2021-11-25 15:39:00+01:00,Belastungs,../data/raw/ecgs_covid/32344-2021-11-25-15-32-...
3,covid,30918,2021-07-20 14:27:00+02:00,Belastungs,../data/raw/ecgs_covid/30918-2021-07-20-13-20-...
4,covid,30557,2021-10-18 17:19:00+02:00,Belastungs,../data/raw/ecgs_covid/30557-2021-10-18-16-12-...
...,...,...,...,...,...
1598,postcovid,17031,2021-01-21 14:52:00+01:00,Belastungs,../data/raw/ecgs_postcovid/17031-2021-01-21-14...
1599,postcovid,9403,2011-10-05 11:16:00+02:00,Belastungs,../data/raw/ecgs_postcovid/9403-2011-10-05-10-...
1600,postcovid,30826,2021-06-29 14:35:00+02:00,Belastungs,../data/raw/ecgs_postcovid/30826-2021-06-29-13...
1601,postcovid,30535,2021-06-23 14:24:00+02:00,Belastungs,../data/raw/ecgs_postcovid/30535-2021-06-23-13...
