In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob
import IPython.display as ipd
import warnings
warnings.filterwarnings("ignore")

import soundfile as sf

from utils import *

In [39]:
df = pd.read_csv('./data/rml_df.csv')

In [40]:
# sort by patient id, stage_start_time

df = df.sort_values(by=['patient_id', 'stage_start_time']).reset_index(drop=True)

In [41]:
# make only columns we need (patient_id, stage_start_time, stage_end_time, stage_duration, stage_type)

sleep_df = df[['patient_id', 'stage_start_time', 'stage_end_time', 'stage_duration', 'stage_type']]
sleep_df.head()

Unnamed: 0,patient_id,stage_start_time,stage_end_time,stage_duration,stage_type
0,995,0.0,210.0,210.0,Wake
1,995,210.0,300.0,90.0,NonREM1
2,995,300.0,870.0,570.0,NonREM2
3,995,870.0,930.0,60.0,Wake
4,995,930.0,960.0,30.0,NonREM1


In [86]:
lists = os.listdir('./data/edf')

lists = [int(x) for x in lists]
lists

[1576, 1590, 1371, 1406, 1037]

In [94]:
# extract dataframes only for patient_id in lists

test_df = sleep_df[sleep_df['patient_id'] == lists[0]]

test_df

Unnamed: 0,patient_id,stage_start_time,stage_end_time,stage_duration,stage_type
57668,1576,0.0,60.0,60.0,Wake
57669,1576,60.0,2280.0,2220.0,NonREM2
57670,1576,60.0,2280.0,2220.0,NonREM2
57671,1576,60.0,2280.0,2220.0,NonREM2
57672,1576,60.0,2280.0,2220.0,NonREM2
...,...,...,...,...,...
57760,1576,13710.0,13740.0,30.0,NonREM1
57761,1576,13740.0,13770.0,30.0,Wake
57762,1576,13770.0,13800.0,30.0,NonREM1
57763,1576,13800.0,13860.0,60.0,NonREM2


In [95]:
# delete duplicated rows

test_df = test_df.drop_duplicates(['stage_start_time', 'stage_end_time', 'stage_duration', 'stage_type'], keep='first').sort_values(by=['stage_start_time']).reset_index(drop=True)
test_df

Unnamed: 0,patient_id,stage_start_time,stage_end_time,stage_duration,stage_type
0,1576,0.0,60.0,60.0,Wake
1,1576,60.0,2280.0,2220.0,NonREM2
2,1576,2280.0,2520.0,240.0,NonREM1
3,1576,2520.0,2550.0,30.0,Wake
4,1576,2550.0,2610.0,60.0,NonREM1
5,1576,2610.0,3270.0,660.0,NonREM2
6,1576,3270.0,3510.0,240.0,Wake
7,1576,3510.0,3630.0,120.0,NonREM1
8,1576,3630.0,3720.0,90.0,Wake
9,1576,3720.0,3780.0,60.0,NonREM1


In [90]:
data_folder = './data/edf/'

folder = os.listdir(data_folder)


['00001576', '00001590', '00001371', '00001406', '00001037']

In [91]:
data_folder + folder[0]

'./data/edf/00001576'

In [43]:

sr = 48000

signal = get_mic_signals(data_folder).extract_all_mic_signal()

In [44]:
len(signal) / sr

11867.0

In [81]:
11850 / 3600

3.2916666666666665

In [82]:
# segment signal by stage

def segment_signal(signal, sr, df):
    signal_list = []
    for i in range(len(df)):
        start = int(df['stage_start_time'][i] * sr)
        end = int(df['stage_end_time'][i] * sr)
        signal_list.append(signal[start:end])
    return signal_list


# save signal as wav file
# the name of wav file is folder name + (stage_start_time // 3600) + index

def save_wav(signal_list, sr, df, data_folder):
    patient_id = data_folder.split('/')[-2]
    wav_folder = './data/wav/' + patient_id + '/'

    if not os.path.exists(wav_folder):
        os.makedirs(wav_folder)

    for i in range(len(df)):
        sleep_hour = int(df['stage_start_time'][i] // 3600) + 1
        wav_id_list.append(patient_id + '_' + str(sleep_hour).zfill(2) + '_' + str(i).zfill(2))

In [None]:
test_df

In [100]:
patient_id = lists[0]
wav_id_list = []

for i in range(len(test_df)):
    sleep_hour = int(test_df['stage_start_time'][i] // 3600) + 1
    a = str(patient_id) + '_' + str(sleep_hour).zfill(2) + '_' + str(i).zfill(2)
    wav_id_list.append(a)

In [102]:
pd.concat([test_df, pd.DataFrame(wav_id_list, columns=['wav_id'])], axis=1)

Unnamed: 0,patient_id,stage_start_time,stage_end_time,stage_duration,stage_type,wav_id
0,1576,0.0,60.0,60.0,Wake,1576_01_00
1,1576,60.0,2280.0,2220.0,NonREM2,1576_01_01
2,1576,2280.0,2520.0,240.0,NonREM1,1576_01_02
3,1576,2520.0,2550.0,30.0,Wake,1576_01_03
4,1576,2550.0,2610.0,60.0,NonREM1,1576_01_04
5,1576,2610.0,3270.0,660.0,NonREM2,1576_01_05
6,1576,3270.0,3510.0,240.0,Wake,1576_01_06
7,1576,3510.0,3630.0,120.0,NonREM1,1576_01_07
8,1576,3630.0,3720.0,90.0,Wake,1576_02_08
9,1576,3720.0,3780.0,60.0,NonREM1,1576_02_09


In [101]:
wav_id_list

['1576_01_00',
 '1576_01_01',
 '1576_01_02',
 '1576_01_03',
 '1576_01_04',
 '1576_01_05',
 '1576_01_06',
 '1576_01_07',
 '1576_02_08',
 '1576_02_09',
 '1576_02_10',
 '1576_02_11',
 '1576_02_12',
 '1576_02_13',
 '1576_02_14',
 '1576_02_15',
 '1576_02_16',
 '1576_02_17',
 '1576_02_18',
 '1576_02_19',
 '1576_02_20',
 '1576_02_21',
 '1576_02_22',
 '1576_03_23',
 '1576_03_24',
 '1576_04_25',
 '1576_04_26',
 '1576_04_27',
 '1576_04_28',
 '1576_04_29',
 '1576_04_30',
 '1576_04_31',
 '1576_04_32',
 '1576_04_33',
 '1576_04_34']

In [None]:
signal_list = segment_signal(signal, sr, test_df)
signal_list

In [83]:
save_wav(signal_list, sr, test_df, data_folder)