In [10]:
import numpy as np
import pandas as pd
import time
from scipy import io # python 에서 mat 파일 읽기 -> loadmat 함수 사용
import matplotlib.pyplot as plt
import random
import pickle

from ecgdetectors import Detectors
from hrvanalysis import get_time_domain_features
from hrvanalysis import get_frequency_domain_features
from hrvanalysis import remove_outliers, remove_ectopic_beats, interpolate_nan_values  #이상치제거, 잡음 제거
from scipy.signal import butter, filtfilt
from scipy.fft import fft, ifft

pd.options.display.float_format = '{:.4f}'.format # 소수점 넷째자리까지만 표현


class DataProcessor:
    def __init__(self, fs=1000, step=10, rtsm=0.9, rtkm=0.8, duration_observ=300):
        self.fs = fs # hz 1초에 1000번 데이터를 받습니다.
        self.step = step # 컬럼 수 조정
        self.rtsm = rtsm # drowsy 기준
        self.rtkm = rtkm # awake 기준
        self.duration_observ = duration_observ # 5분 관찰
        
        
    def load_data(self, path):
        self.mat_file = io.loadmat(path)
        self.data_bio = self.mat_file['sync_data_bio']
        self.data_event_startcut_all = self.mat_file['data_event_startcut']
        self.data_event_startcut = pd.DataFrame(self.data_event_startcut_all, columns=['흔들린 방향','사건 발생시간','반응시간','벗어난 면적'])
        self.data_event_startcut_rt = self.data_event_startcut_all[:, 2]
        
    def removeoutliers(self):
        self.data_event_startcut_rt_without_outliers = remove_outliers(rr_intervals=self.data_event_startcut_rt, 
                                                                       low_rri=0, 
                                                                       high_rri=20)        
    def frequency_filtering(self):
        rt = self.data_event_startcut_rt_without_outliers
        rt_g = np.zeros(len(rt))
        index_rt_awake_base = []
        index_rt_drowsy_base = []

        for index in range(len(rt)):
            if index >= 2 and index <= len(rt) - 3:
                rt_g[index] = (rt[index-2]+rt[index-1]+rt[index]+rt[index+1]+rt[index+2]) / 5

                if rt[index] < self.rtkm and rt_g[index] < self.rtkm:
                    index_rt_awake_base.append(index)

                elif rt[index] > self.rtsm and rt_g[index] > self.rtsm:
                    index_rt_drowsy_base.append(index)
        
        rng = np.random.default_rng()
        index_rand = []
        if len(index_rt_awake_base) == len(index_rt_drowsy_base):
            index_rt_drowsy=index_rt_drowsy_base
            index_rt_awake=index_rt_awake_base
            
        elif len(index_rt_awake_base) < len(index_rt_drowsy_base):
            index_rt_drowsy=random.sample(index_rt_drowsy_base, len(index_rt_awake_base))
            index_rt_awake=index_rt_awake_base
        
        else :
            index_rt_awake=random.sample(index_rt_awake_base, len(index_rt_drowsy_base))    
            index_rt_drowsy=index_rt_drowsy_base
            
            
        # 잠이 깬 
        data_event_startcut_time_awake = self.data_event_startcut['사건 발생시간'][index_rt_awake] * self.fs
        self.data_event_startcut_time_int_awake = list(map(int,data_event_startcut_time_awake)) # int 형으로 변환
        print("len(data_event_startcut_time_int_awake): ", len(self.data_event_startcut_time_int_awake))  # 졸음운전이라고 생각 안 되는 인덱스

        # 졸음 운전 
        data_event_startcut_time_drowsy = self.data_event_startcut['사건 발생시간'][index_rt_drowsy] * self.fs
        self.data_event_startcut_time_int_drowsy = list(map(int,data_event_startcut_time_drowsy)) # int 형으로 변환 
        print("len(data_event_startcut_time_int_drowsy): ", len(self.data_event_startcut_time_int_drowsy))  # 졸음운전이라고 생각되는 인덱스       
        
    def process_data(self):
        data_bio_chsel = self.data_bio[:, 8]
        # 고속 푸리에 변환 (FFT)
        fft_output = fft(data_bio_chsel)

        # 주파수 대역 필터링 (0.67Hz - 50Hz)
        lowcut_index = int(0.67 / self.fs * len(data_bio_chsel))
        highcut_index = int(50 / self.fs * len(data_bio_chsel))
        filtered_fft_output = np.zeros_like(fft_output)
        filtered_fft_output[lowcut_index:highcut_index] = fft_output[lowcut_index:highcut_index]
        filtered_fft_output[-highcut_index:-lowcut_index] = fft_output[-highcut_index:-lowcut_index]

        # 역 고속 푸리에 변환 (IFFT)
        data_bio_chsel = np.real(ifft(filtered_fft_output))
        images_ori_awake = [] # 전체 이미지를 담을 배열
        label_awake = [] # 깨어남 label

        len_list = []
        for i_event_time in range(len(self.data_event_startcut_time_int_awake)):

            # samp_observ: 발생 시간 (data_event_startcut['사건 발생시간'][index_rt_awake] * fs)
            samp_observ = self.data_event_startcut_time_int_awake[i_event_time] # data_event_startcut_time_int_awake[0]

            # 5분전 데이터가 존재하는 경우 and 반응시간에 해당하는 ecg 신호가 존재하지 않음 (null값이 나오는 문제 해결)
            if samp_observ - self.fs*self.duration_observ >= 0 and samp_observ <= len(data_bio_chsel): # data_event_startcut_time_int_awake[0] - 1000*300 = 3

                data_bio_chsel_observ = data_bio_chsel[samp_observ - self.fs*self.duration_observ: samp_observ] 

                unfiltered_ecg = data_bio_chsel_observ
                images_ori_awake.append(unfiltered_ecg)

                # 깨어남일때 label 추가
                label_awake.append(0)
        # drowsy
        images_ori_drowsy = [] # 전체 이미지를 담을 배열 # 변수명 지정법: (data의 큰 범위-중요포인트)
        label_drowsy = [] # 졸음 label

        for i_event_time in range(len(self.data_event_startcut_time_int_drowsy)): 

            #  samp_observ: 발생 시간 (data_event_startcut['사건 발생시간'][index_rt_awake] * fs)
            samp_observ = self.data_event_startcut_time_int_drowsy[i_event_time] 


            # 5분전 데이터가 존재하는 경우 and 반응시간에 해당하는 ecg 신호가 존재하지 않음 (null값이 나오는 문제 해결)
            if samp_observ - self.fs*self.duration_observ >= 0 and samp_observ <= len(data_bio_chsel): # data_event_startcut_time_int_awake[0] - 1000*300 = 3

                data_bio_chsel_observ = data_bio_chsel[samp_observ - self.fs*self.duration_observ: samp_observ]

                unfiltered_ecg_d = data_bio_chsel_observ
                images_ori_drowsy.append(unfiltered_ecg_d)

                # 졸음일때 label 추가
                label_drowsy.append(1)   
        total_x = images_ori_awake + images_ori_drowsy
        total_y = label_awake + label_drowsy
        if len(total_x)!=0:
            df_x = pd.DataFrame(total_x)
            df_x = df_x.iloc[:, 0::self.step]
            df_y = pd.DataFrame(total_y)
            df_y.columns = ["label"]
            df = pd.concat([df_x, df_y], axis = 1) # 열 방향으로 합치기
            return df
        
    def data_processing(self,len_file):
        ECG_df = pd.DataFrame()
        for i in range(2,len_file+1,1):
            path = 'seatdr/sync_data_seatdr_p' + str(i) + 's2.mat'
            self.load_data(path)
            self.removeoutliers()
            self.frequency_filtering()
            df=self.process_data()
            
            
            
            ECG_df = pd.concat([ECG_df, df], axis = 0)
        self.ECG_df = ECG_df
    def data_save(self):
        file_name = 'data_final_s2_'+ str(self.duration_observ) +'s_'+ str(self.fs) +'hz_RT'+ '.pkl'
    
        

# 사용 예시
data_processor = DataProcessor()
data_setup=data_processor.data_processing(20)
data_setup.to_pickle('rtsm09.pkl')

0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  145
len(data_event_startcut_time_int_drowsy):  145
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  15
len(data_event_startcut_time_int_drowsy):  15
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  83
len(data_event_startcut_time_int_drowsy):  83
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  53
len(data_event_startcut_time_int_drowsy):  53
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  50
len(data_event_startcut_time_int_drowsy):  50
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  141
len(data_event_startcut_time_int_drowsy):  141
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  195
len(data_event_startcut_time_int_drowsy):  195
0 outlier(s) have been deleted.
len(data_event_startcut_time_int_awake):  140
len(data_event_startcut_time_int_drowsy):  140
0 outlie

Unnamed: 0,0,10,20,30,40,50,60,70,80,90,...,299910,299920,299930,299940,299950,299960,299970,299980,299990,label
0,0.0359,0.0308,0.0200,0.0030,-0.0185,-0.0393,-0.0526,-0.0529,-0.0400,-0.0205,...,0.0066,0.0070,0.0062,0.0043,0.0018,-0.0004,-0.0015,-0.0006,0.0027,0
1,0.0622,0.0692,0.0759,0.0796,0.0765,0.0627,0.0363,-0.0008,-0.0418,-0.0779,...,-0.0055,-0.0011,0.0069,0.0177,0.0282,0.0345,0.0331,0.0218,0.0007,0
2,0.0609,0.0687,0.0759,0.0794,0.0755,0.0604,0.0327,-0.0049,-0.0452,-0.0785,...,-0.0089,-0.0092,-0.0062,-0.0014,0.0021,0.0026,0.0009,-0.0005,0.0012,0
3,-0.1414,-0.1232,-0.0789,-0.0354,-0.0086,0.0003,-0.0009,-0.0031,-0.0010,0.0059,...,0.0035,0.0014,0.0036,0.0113,0.0236,0.0371,0.0474,0.0504,0.0435,0
4,-0.0105,-0.0039,-0.0066,-0.0093,-0.0069,0.0007,0.0105,0.0191,0.0247,0.0272,...,-0.0594,-0.0463,-0.0351,-0.0245,-0.0152,-0.0085,-0.0053,-0.0049,-0.0055,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184,0.0030,0.0007,0.0057,0.0146,0.0232,0.0293,0.0334,0.0375,0.0440,0.0538,...,0.1299,0.1325,0.1299,0.1190,0.0967,0.0610,0.0138,-0.0385,-0.0865,1
185,0.0880,0.0898,0.0808,0.0717,0.0675,0.0683,0.0714,0.0751,0.0787,0.0829,...,-0.0835,-0.0677,-0.0549,-0.0465,-0.0424,-0.0408,-0.0391,-0.0353,-0.0289,1
186,-0.0287,-0.0343,-0.0259,-0.0019,0.0335,0.0674,0.0820,0.0644,0.0176,-0.0373,...,0.0640,0.0678,0.0733,0.0816,0.0919,0.1027,0.1124,0.1198,0.1244,1
187,-0.0082,-0.0349,-0.0548,-0.0625,-0.0536,-0.0275,0.0081,0.0369,0.0403,0.0099,...,-0.0136,-0.0075,0.0015,0.0135,0.0278,0.0420,0.0533,0.0592,0.0578,1
