In [72]:
# Skriptas Mit2ZIVE įrašų anotacij7 testavimui

import pandas as pd
import numpy as np

import json
import sys
import wfdb
from pathlib import Path
from neurokit2 import signal_resample
from collections import Counter

def make_dict_array(atr_sample, atr_symbol):
    array_of_dict = []
    for sample, symbol in zip(atr_sample, atr_symbol):
        d = {'sample': sample, 'symbol': symbol}
        array_of_dict.append(d)
    return array_of_dict   

def print_dict_array(dict_array):
    for d in dict_array:
        sample = d['sample']
        symbol = d['symbol']
        if (symbol != 'N'):
            print(f"Sample: {sample}, Symbol: {symbol}")     

def zive_read_file_1ch(filename):
    f = open(filename, "r")
    a = np.fromfile(f, dtype=np.dtype('>i4'))
    ADCmax=0x800000
    Vref=2.5
    b = (a - ADCmax/2)*2*Vref/ADCmax/3.5*1000
    ecg_signal = b - np.mean(b)
    return ecg_signal

def zive_read_df_rpeaks(db_path, file_name):
    file_path = Path(db_path, file_name + '.json')
    with open(file_path,'r', encoding="utf8") as f:
        data = json.loads(f.read())
    df_rpeaks = pd.json_normalize(data, record_path =['rpeaks'])
    return df_rpeaks

def read_signl(rec_dir, filename):
    """
    Tinka EKG įrašų skaitymui tiek zive, tiek mit2zive atveju.
    zive atveju filename pvz. 1621694.321, 1621694.321.json
    mit2zive atveju, pvz. 100.000, 100.000.json - dalis iki taško ne ilgesnė
    už 4 simbolius

    Parameters
    ------------
        rec_dir: string
        filename: string
    Return
    -----------
        signl: numpy array, float
    """   
    file_path = Path(filename)
    name = file_path.stem
    extension = file_path.suffix
    print("Name:", name)
    print("Extension:", extension)

    file_path = Path(rec_dir, filename)
    print('file_path:', file_path)
    
    if len(name) <= 4:
        with open(file_path, "rb") as f:
            signl_loaded = np.load(f) 
        print(signl_loaded[:10])
        print('mit2zive signl_loaded ', signl_loaded.shape)
        return signl_loaded
    else:        
        signl_loaded = zive_read_file_1ch(file_path)
        print(signl_loaded[:10])
        print('zive signl_loaded', signl_loaded.shape)
        return signl_loaded


print("\nSukuriamas Zive duomenų analogas iš MIT duomenų")

my_os=sys.platform
print("OS in my system : ",my_os)

if my_os != 'linux':
    OS = 'Windows'
else:  
    OS = 'Ubuntu'

# //////////////// NURODOMI PARAMETRAI /////////////////////////////////////////////////////

if OS == 'Windows':
    Duomenu_aplankas = 'D:\\DI\\DUOM_2022_RUDUO'   # variantas: Windows
else:
    Duomenu_aplankas = '/home/kesju/DI/DUOMENU_TVARKYMAS_2023'   # arba variantas: UBUNTU, be Docker

#  Aplankas, kur yra MIT2ZIVE duomenis
db_folder_mit2zive = 'records_npy_tst'

# Aplankas su MIT-BIH duomenų rinkiniu
db_folder_mit = 'mit-bih-arrhythmia-database-1.0.0'

# Užduodamas pacientų įrašų sąrašas
# records_nr = np.array([124,209,228])
records_nr = np.array([100, 101, 103, 105, 106, 108, 109, 111, 112, 113, 114, 115, 116, 117, 118, 119, 121, 122, 123, 124,
200, 201, 202, 203, 205, 208, 209, 210, 212, 213, 214, 215, 219, 220, 221, 222, 223, 228, 230, 231, 232, 233, 234])

# Pastaba: įrašai su nr. 102, 104, 107, ir 217 yra gauti iš pacientų su pacemakers,
# 207 paciento įrašas turi segmentus su ventricular flutter or
# fibrillation VF - jie į duomenis neįtraukiami.

# Testavimui
records_nr = np.array([100.000, 100.001, 100.002])
records_nr = np.array([232])

# Nuoroda į aplanką su MIT2ZIVE EKG įrašais ir anotacijoms (.json)
rec_dir = Path(Duomenu_aplankas, db_folder_mit2zive)

# Nuoroda į MIT-BIH duomenų rinkinį
db_path_mit = Path(Duomenu_aplankas, db_folder_mit)

quality_dict = {0:'cc', 1:'nc', 2:'cn', 3:'nn'}

print("Bendras Zive duomenų aplankas: ", Duomenu_aplankas)
print("Pacientų įrašų sąrašas:\n",records_nr)
print("Aplankas, kur yra MIT2ZIVE duomenys:\n", db_folder_mit2zive)

# //////////////// Nurodomi MIT2ZIVE parametrai ////////////////////////////////




Sukuriamas Zive duomenų analogas iš MIT duomenų
OS in my system :  linux
Bendras Zive duomenų aplankas:  /home/kesju/DI/DUOMENU_TVARKYMAS_2023
Pacientų įrašų sąrašas:
 [232]
Aplankas, kur yra MIT2ZIVE duomenys:
 records_npy_tst


In [73]:
def find_f_l(sample, lst):
    last_s_idx = -1
    result = []

    for i in sample:
        if lst[i] == 's':
            last_s_idx = i
        elif lst[i] == 'f' and last_s_idx != -1 and i > last_s_idx:
            result.append((last_s_idx, i))
            last_s_idx = -1        
    return result


for record_nr in records_nr:

# ----------------------------------------------- Nuskaitome įrašą
    subject_path = f'{db_path_mit}/{record_nr}'
    
    record = wfdb.rdrecord(subject_path, sampfrom=0,channels=[0], physical=True)
    signl_raw = record.p_signal[:,0]
    # print(sign_raw[:20])

    len_signl_raw = signl_raw.shape[0]
    print("\n")
    print(f"Pacientas iš MIT: {record_nr}  Reikšmių: {len_signl_raw}")

    # Nuskaitome originalaus įrašo anotacijas
    # https://wfdb.readthedocs.io/en/latest/wfdb.html 
    ann = wfdb.rdann(subject_path, 'atr', sampfrom=0, sampto=None, shift_samps=False)
    atr_sample_org = ann.sample
    atr_symbol_org = np.array(ann.symbol)
    print('len(atr_sample_org):', len(atr_sample_org))

    ann_subtype = ann.subtype
    print('len(ann.subtype):', len(ann.subtype))

    occurrences = [i for i in range(len(atr_symbol_org)) if atr_symbol_org[i] == '~']
    print(occurrences)
    for idx in occurrences:
        index = atr_sample_org[idx]
        symbol = atr_symbol_org[idx]
        subtype = ann_subtype[idx]
        val = index/360 # secs
        # print(f"index: {index}, idx: {idx}, symbol: {symbol}, subtype: {subtype} {quality_dict.get(subtype,'un')} {val//60:.2f} min {val % 60:.1f} sec ")   
        # print(index, symbol, subtype)    

    for idx in occurrences:
        if ann_subtype[idx] == 1 or ann_subtype[idx] == 3:
            atr_symbol_org[idx] = 's'
        elif ann_subtype[idx] == 0 or ann_subtype[idx] == 2:
            atr_symbol_org[idx] = 'f'
        else:
            atr_symbol_org[idx] = ''

    print()
    for idx in occurrences:
        index = atr_sample_org[idx]
        symbol = atr_symbol_org[idx]
        subtype = ann_subtype[idx]
        val = index/360 # secs
        # print(f"index: {index}, idx: {idx}, symbol: {symbol}, subtype: {subtype} {quality_dict.get(subtype,'un')} {val//60:.2f} min {val % 60:.1f} sec ")   
        # print(index, symbol, subtype)    

    print("\nAnotacijų pasiskirstymas originale:")
    smb_lst = Counter(atr_symbol_org)
    print(dict(smb_lst))
    print()

    result = find_f_l(occurrences, atr_symbol_org)
    print(result)

    for elem in result:
        lowerIndex = elem[0]
        start = atr_sample_org[lowerIndex]/360
        start_min = start//60
        start_sec = start % 60

        upperLevel = elem[1]
        finish = atr_sample_org[upperLevel]/360
        finish_min = finish//60
        finish_sec = finish % 60
        # print(f"lowerIndex: {lowerIndex} upperLevel: {upperLevel} start: {start_min:.2f} min {start_sec:.1f} sec finish: {finish_min:.2f} min {finish_sec:.1f} sec")    
        print(f"start: {start_min:.2f} min {start_sec:.1f} sec finish: {finish_min:.2f} min {finish_sec:.1f} sec")    
#  {val//60:.2f} min {val % 60:.0f} sec 

    # smb_lst = Counter(atr_symbol)
    # print("\nOriginalių anotacijų pasiskirstymas:")
    # print(smb_lst)
    # print()
    # dict_array = make_dict_array(atr_sample,atr_symbol)
    # print_dict_array(dict_array)

lst = ['s', 'f', 'f', 'f', 's', 'f', 's', 'f', 'f', 'f', 'f', 's', 'f', 's']
sample = [0, 1,2,3,4,5,6,7,8,9,10,11,12,13]

result = find_f_l(sample, lst)
print(result)




Pacientas iš MIT: 232  Reikšmių: 650000
len(atr_sample_org): 1816
len(ann.subtype): 1816
[7, 69, 83, 90, 224, 244, 273, 280, 282, 286, 346, 347, 359, 367, 382, 390, 393, 401, 402, 407, 425, 429, 703, 722, 737, 744, 750, 775, 789, 920, 924, 1352, 1354, 1365, 1370]


Anotacijų pasiskirstymas originale:
{'+': 1, 'R': 397, 'A': 1382, 's': 12, 'f': 23, 'j': 1}

[(7, 69), (83, 90), (282, 286), (347, 359), (401, 402), (703, 722), (750, 775), (1352, 1354), (1365, 1370)]
start: 0.00 min 7.1 sec finish: 1.00 min 11.3 sec
start: 1.00 min 23.8 sec finish: 1.00 min 30.0 sec
start: 4.00 min 36.7 sec finish: 4.00 min 41.0 sec
start: 5.00 min 40.0 sec finish: 5.00 min 49.6 sec
start: 6.00 min 28.5 sec finish: 6.00 min 29.6 sec
start: 11.00 min 17.5 sec finish: 11.00 min 31.4 sec
start: 11.00 min 51.5 sec finish: 12.00 min 13.1 sec
start: 22.00 min 12.6 sec finish: 22.00 min 14.5 sec
start: 22.00 min 23.6 sec finish: 22.00 min 27.3 sec
[(0, 1), (4, 5), (6, 7), (11, 12)]
