In [1]:
# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2
%cd ...

D:\GitHub\eeg_analysis\notebook\nii_test


-----

## 환경 구성

In [2]:
# Load some packages
import os
import re
import copy
import glob
import json
import shutil
import pprint
import datetime
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from IPython import display

import nibabel as nib

In [2]:
# Other settings
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # cleaner text

plt.style.use('default') 
# ['Solarize_Light2', '_classic_test_patch', 'bmh', 'classic', 'dark_background', 'fast', 
#  'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 
#  'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 
#  'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 
#  'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']

plt.rcParams['image.interpolation'] = 'bicubic'
plt.rcParams["font.family"] = 'Arial' # 'NanumGothic' # for Hangul in Windows
plt.rcParams["animation.html"] = "jshtml"
plt.rcParams['animation.ffmpeg_path'] = 'c:/ffmpeg/bin/ffmpeg.exe'

-----

## 파일 구조 파악

In [None]:
folder_path1 = r'H:\Other_DB\CAU_EEG\230314_nii_file'
folder_path2 = r'H:\Other_DB\CAU_EEG\230315_MRI_3DT1'

file_dict = {}

for file_path in glob.glob(os.path.join(folder_path1, '*.nii')) + glob.glob(os.path.join(folder_path2, '*.nii')):
    file_name = os.path.basename(file_path)

    subject_id = None
    for chunk in re.split('[-_.() ]', file_name):
        if chunk.isnumeric() and len(chunk) >= 5:
            subject_id = chunk
            break
    
    if subject_id is not None:
        subject_id = subject_id[:8]
        subject_id = '0' * (8 - len(subject_id)) + subject_id
    
    if subject_id in file_dict:
        file_dict[subject_id].append(file_path)
    else:
        file_dict[subject_id] = [file_path]

for file_path in glob.glob(os.path.join(folder_path2, '*/*.nii')):
    subject_id = os.path.dirname(file_path)
    
    if subject_id in file_dict:
        file_dict[subject_id].append(file_path)
    else:
        file_dict[subject_id] = [file_path]
        
# pprint.pprint(file_dict)

### 파일 중복 검사

In [None]:
duplication = {subject_id: [] for subject_id in file_dict.keys()}

for subject_id, file_path_list in tqdm(file_dict.items()):
    checker = [False] * len(file_path_list)
    
    for i, file_path in enumerate(file_path_list):
        if checker[i]:
            continue
        current_set = set([file_path])
        checker[i] = True
        
        pivot = nib.load(file_path).get_fdata()
        for k in range(i + 1, len(file_path_list)):
            if checker[k]:
                continue
            latter = nib.load(file_path_list[k]).get_fdata()                
            if pivot.shape == latter.shape and (pivot == latter).all():
                current_set.add(file_path_list[k])
                checker[k] = True
        duplication[subject_id].append(current_set)

### 파일 통합 (가장 긴 이름으로)

In [None]:
# target_folder = r"H:\Other_DB\CAU_EEG\230403_MRI_Integration"
# for dup_list in tqdm(duplication.values()):
#     for dup_set in dup_list:
#         longest_name = ''
#         longest_path = ''
#         for file_path in dup_set:
#             file_name = os.path.basename(file_path)
#             if len(longest_name) < len(file_name):
#                 longest_name = file_name
#                 longest_path = file_path
#         shutil.copy2(longest_path, os.path.join(target_folder, longest_name))

### 파일 통합 (환자명으로, 기존 파일들 JSON으로 저장)

In [None]:
history_dict = {}
target_folder = r"H:\Other_DB\CAU_EEG\230403_MRI_Integration"
for subject_id, dup_list in tqdm(duplication.items()):
    for i, dup_set in enumerate(dup_list):
        new_name = f"{subject_id}_{i:02d}"
        shutil.copy2([*dup_set][0], os.path.join(target_folder, new_name + '.nii'))        
        history_dict[new_name] = [*dup_set]
        
# events
with open(os.path.join(target_folder, 'file_history') + '.json', 'w') as json_file:
    json.dump(history_dict, json_file, indent=4)

In [4]:
# mri data subjects
with open(r"H:\Other_DB\CAU_EEG\230413_MRI_Integration\file_history.json", 'r') as json_file:
    mri_dict = json.load(json_file)
    
mri_subject_set = set()
for subject_id, file_list in mri_dict.items():
    mri_subject_set.add(subject_id.split('_')[0])

# edf data subjects
with open(r"H:\Other_DB\CAU_EEG\201020\annotation_debug.json", 'r') as json_file:
    edf_dict = json.load(json_file)

edf_subject_set = set()
for data in edf_dict['data']:
    edf_subject_set.add(data['edfname'].split('_')[0])

In [10]:
print(len(mri_subject_set & edf_subject_set))
print(len(mri_subject_set))
print(len(edf_subject_set))
print(len(mri_dict))
print(len(edf_dict['data']))

598
1827
1155
3024
1379


In [6]:
print(mri_subject_set)

{'00600730', '00366640', '00877170', '00954281', '00951640', '00872467', '00895023', '01045256', '00980362', '01301659', '00766584', '01147101', '00908912', '01126091', '01279468', '00636410', '00912302', '01134557', '00950602', '00879526', '01142809', '00404884', '00401772', '01225682', '00675076', '00835568', '00202913', '01166014', '01150274', '01390990', '00689836', '00627477', '00740694', '01242984', '00286931', '00745209', '01124077', '00124359', '00683039', '00001233', '00635862', '83149700', '00993063', '00701929', '00391789', '00712738', '01240949', '00829441', '00661447', '01193017', '01130722', '00122017', '00640064', '00389852', '00074913', '00774088', '00917762', '00430181', '01066082', '00131027', '01136963', '00787716', '00789350', '01188626', '00489204', '00725588', '01180776', '00115457', '01300390', '01300391', '01183615', '38014000', '00285244', '01327430', '00707209', '00978066', '01279912', '01296533', '00635500', '00964059', '01303196', '00791663', '01322787', '01