In [2]:
import re
import numpy as np
import scipy.io
import os

In [89]:


# Define paths
data_path = '.'
record_file = 'RECORDS'

# Read RECORDS.txt to get file names
with open(record_file, 'r') as f:
    data_names = [line.strip() for line in f.readlines()]

# Initialize data storage list
data = []

# Process each record
for name in data_names:
    # Initialize data structure for this record
    record = {'cParams': {}, 'FHR': None, 'UC': None, 'Time': None}
    
    # Read header file (.hea)
    with open(os.path.join(data_path, f'{name}.hea'), 'r') as f:
        # Read the first line for sample rate and sample count
        first_line = f.readline().strip().split()
        n_nr_signals, n_fs, n_nr_samples = int(first_line[1]), int(first_line[2]), int(first_line[3])
        
        # Initialize header storage
        c_header = []
        
        # Read each signal line
        for _ in range(n_nr_signals):
            line = f.readline().strip()
            if 'FHR' in line or 'UC' in line:
                line_values=line.split()[1:7]
                line_values[1]=re.match('[0-9]*',line_values[1]).group(0)
                values = list(map(int, line_values))
            #elif 'UC' in line:
            #   values = list(map(int, line.split()[1:7]))
            c_header.append({
                'dformat': values[0],
                'gain': values[1],
                'bitres': values[2],
                'zerovalue': values[3],
                'firstvalue': values[4],
                'checksum': values[5]
            })
    
    # Extract clinical parameters from the remaining lines in .hea file
    with open(os.path.join(data_path, f'{name}.hea'), 'r') as f:
        params = {}
        for line in f:
            if re.match('\\#[a-zA-Z]', line):
                line=line.split()
                param_val=line[-1]
                param_name="_".join(line[:-1]).replace('#', '').replace('.','')
                #param_name = line[1:14].strip().replace(' ', '_').replace('.', '').replace('(', '_').replace(')', '').replace('/', '_')
                params[param_name]=param_val
                #try:
                #    params[param_name] = float(line[15:].strip())
                #except ValueError:
                #    params[param_name] = line[15:].strip()
        record['cParams'] = params
    
    # Read the data file (.dat)
    with open(os.path.join(data_path, f'{name}.dat'), 'rb') as f:
        raw_data = np.fromfile(f, dtype=np.uint16).reshape((n_nr_signals, n_nr_samples), order='F')
    
    # Convert raw data using header information
    fhr = (raw_data[0, :] - c_header[0]['zerovalue']) / c_header[0]['gain']
    uc = (raw_data[1, :] - c_header[1]['zerovalue']) / c_header[1]['gain']
    time = np.arange(n_nr_samples) / n_fs
    
    # Assign data to record structure
    record['FHR'] = fhr
    record['UC'] = uc
    record['Time'] = time
    
    # Append record to main data list
    data.append(record)

# Save as a .mat file
scipy.io.savemat('Data.mat', {'Data': data})


In [81]:
FHRs=[]
UCs=[]
times=[]
for i in range(len(data)):
    FHRs.append(data[i]['FHR'])
    UCs.append(data[i]['UC'])
    times.append(data[i]['Time'])

    data[i]=data[i]['cParams']
    for k in list(data[i].keys())[:2]:
        del data[i][k]

In [82]:
import pandas as pd
df=pd.DataFrame(data)

In [83]:
df.nunique()

pCO2             82
BE              138
Apgar1           10
Apgar5            7
NICU_days         1
Seizures          1
HIE               1
Intubation        1
Main_diag         1
Other_diag        1
Gest_weeks        7
Weight(g)       180
Sex               2
Age              27
Gravidity        10
Parity            7
Diabetes          2
Hypertension      2
Preeclampsia      2
Liq_praecox       2
Pyrexia           2
Meconium          2
Presentation      4
Induced           2
Istage          226
NoProgress        2
CK/KP             2
IIstage           8
Deliv_type        2
dbID            552
Rec_type          4
Pos_IIst         14
Sig2Birth         1
dtype: int64

In [86]:
df.Deliv_type.unique()

array(['1', '2'], dtype=object)