In [1]:
import os
from glob import glob
import numpy as np
from tqdm import tqdm
import random
import pandas as pd

#torch
import torch
import torch.nn as nn
from torch.nn.parameter import Parameter
from torch.utils.data import Dataset,DataLoader

#mne
from mne import Epochs, pick_types, find_events
from mne.io import concatenate_raws, read_raw_edf

#signal preprocess
import scipy.signal as ssig    

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
seed = 42

In [2]:
# seed 고정 함수 및 seed 고정
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(seed)

In [3]:
train_path = '/home/maic-player/dataset/1_Train+Val'
test_path = '/home/maic-player/dataset/2_Test'
save_path = '/home/maic-player/FINAL_SUBMISSION/npy_60000'

In [4]:
dataset_path_list = sorted(glob(os.path.join(train_path, "*.edf")))
ground_truth = []

for dataset_path in dataset_path_list:
    if 'Normal' in dataset_path:
        ground_truth.append(0)
    else:
        ground_truth.append(1)

train_path_list, valid_path_list, train_label, valid_label = train_test_split(dataset_path_list, ground_truth, test_size=0.2, shuffle=True, stratify=ground_truth, random_state=seed)

In [5]:
#edf to numpy
def edf_to_npy(path, resample_rate):
    raw = read_raw_edf(path, preload=True, stim_channel=False, verbose=False)
    data_np = raw.get_data()
    data_pad = []
    
    for act_data in data_np:
        data_pad.append(ssig.resample(act_data, resample_rate))
    
    data_arr = np.array(data_pad).astype(np.float32)

    return data_arr

In [None]:
npy_save_path = os.path.join(save_path, 'train')
for train_path in tqdm(train_path_list):
    data_np = edf_to_npy(train_path, 60000)
    file_name = train_path.split('/')[-1].replace('.edf', '')
    np.save(os.path.join(npy_save_path, file_name + '.npy'), data_np)

npy_save_path = os.path.join(save_path, 'valid')
for valid_path in tqdm(valid_path_list):
    data_np = edf_to_npy(valid_path, 60000)
    file_name = valid_path.split('/')[-1].replace('.edf', '')
    np.save(os.path.join(npy_save_path, file_name + '.npy'), data_np)
    

In [8]:
npy_save_path = os.path.join(save_path, 'test')
for test_file in tqdm(glob(os.path.join(test_path, '*.edf'))):
    data_np = edf_to_npy(test_file, 60000)
    file_name = test_file.split('/')[-1].replace('.edf', '')
    np.save(os.path.join(npy_save_path, file_name + '.npy'), data_np)

100%|██████████| 102/102 [11:58<00:00,  7.05s/it]
