In [2]:
!pip install pydicom

import os
import pydicom
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import ipywidgets as widgets
import nibabel as nib
import numpy as np
import pyarrow.parquet as pq

from IPython.display import HTML
from tqdm import tqdm

from google.colab import drive
import os

def mount_and_navigate_to_dir(directory_path="/content/drive/MyDrive/"):
    """
    Mount Google Drive and navigate to a specific directory.

    Args:
    - directory_path (str): The directory to navigate to after mounting.
    """
    ROOT = "/content/drive"
    drive.mount(ROOT, force_remount=True)
    os.chdir(directory_path)
    print(f"Current directory: {os.getcwd()}")

mount_and_navigate_to_dir()

Mounted at /content/drive
Current directory: /content/drive/MyDrive


In [None]:
def get_dcm_info(file_path):
    try:
        # DCM 파일을 읽어옴
        dcm = pydicom.dcmread(file_path)

        # 픽셀 데이터를 NumPy 배열로 변환
        pixel_data = dcm.pixel_array

        # 형태(shape), 최대값, 최소값 출력
        shape = pixel_data.shape
        max_value = np.max(pixel_data)
        min_value = np.min(pixel_data)

        plt.imshow(pixel_data, cmap=plt.cm.bone)  # cmap은 이미지 스타일을 설정합니다.
        plt.axis('off')  # 축을 표시하지 않습니다.
        plt.show()

        return f"DCM 파일 shape: {shape}, 최대값: {max_value}, 최소값: {min_value}"
    except Exception as e:
        return f"오류 발생: {str(e)}"

# DCM 파일 경로를 지정
dcm_file_path = "rsna_data/train_images/10127/1554/10.dcm"

# 함수 호출하여 DCM 파일의 형태 출력
print(get_dcm_info(dcm_file_path))

In [None]:
directory = "rsna_data/train_images/10004/21057"
dcm_files = [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.dcm')]
for dcm_file in dcm_files:
  dcm_info = get_dcm_info(dcm_file)
  print(dcm_info)

In [None]:
# Original -> show

def load_and_resize_dcm_images(directory, target_size):
    # 디렉토리에서 DCM 파일 목록을 가져옴
    dcm_files = [f for f in os.listdir(directory) if f.endswith('.dcm')]
    original_depth = len(dcm_files)
    new_depth = target_size[2]

    # 이미지를 저장할 빈 3D 배열 생성
    images = np.zeros((target_size[0], target_size[1], new_depth), dtype=np.uint8)

    for i in range(new_depth):
        dcm_path = os.path.join(directory, dcm_files[i*int(np.floor(original_depth/new_depth))])

        # DCM 파일을 읽어옴
        dcm = pydicom.dcmread(dcm_path)

        # DCM 이미지 데이터를 NumPy 배열로 변환 + 리사이즈
        image = dcm.pixel_array.astype(np.uint8)
        resized_image = cv2.resize(image, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)

        # 리사이즈된 이미지를 배열에 저장
        images[:, :, i] = resized_image

    return images

TRAIN_CSV = "rsna_data/train_series_meta.csv"
data = pd.read_csv(TRAIN_CSV)

for index, row in data.iterrows():
  # DCM 파일이 있는 디렉토리 경로 설정
  dcm_directory = 'rsna_data/train_images/' + str(int(row['patient_id'])) + '/' + str(int(row['series_id']))
  print(str(int(row['series_id'])))

  # DCM 파일을 로드하여 target size 크기의 배열로 가져옴
  target_size = (256, 256, 10)
  loaded_images = load_and_resize_dcm_images(dcm_directory, target_size)

  num_columns = 4
  num_rows = (loaded_images.shape[2] + num_columns - 1) // num_columns

  # Create a figure with subplots
  fig, axs = plt.subplots(num_rows, num_columns, figsize=(20, 20))

  for i in range(loaded_images.shape[2]):
      row_index = i // num_columns
      col_index = i % num_columns
      ax = axs[row_index, col_index]
      ax.imshow(loaded_images[:, :, i], cmap='gray')
      ax.axis('off')
      ax.set_title(f'Resized Image {i+1}')

  # Remove any empty subplots
  for i in range(loaded_images.shape[2], num_rows * num_columns):
      row_index = i // num_columns
      col_index = i % num_columns
      axs[row_index, col_index].axis('off')

  plt.tight_layout()
  plt.show()

In [None]:
# Normalization -> show

def load_and_resize_dcm_images(directory, target_size):
    # 디렉토리에서 DCM 파일 목록을 가져옴
    dcm_files = [f for f in os.listdir(directory) if f.endswith('.dcm')]
    original_depth = len(dcm_files)
    new_depth = target_size[2]

    # 이미지를 저장할 빈 3D 배열 생성
    images = np.zeros((target_size[0], target_size[1], new_depth), dtype=np.uint8)

    for i in range(new_depth):
        dcm_path = os.path.join(directory, dcm_files[i*int(np.floor(original_depth/new_depth))])

        # DCM 파일을 읽어옴
        dcm = pydicom.dcmread(dcm_path)

        # DCM 이미지 데이터를 NumPy 배열로 변환 + 리사이즈
        resized_image = cv2.resize(dcm.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
        rescaled_image = (resized_image-np.min(resized_image))/(np.max(resized_image)-np.min(resized_image))
        rescaled_image = (rescaled_image*256).astype(np.uint8)

        # 리사이즈된 이미지를 배열에 저장
        images[:, :, i] = rescaled_image

    return images

TRAIN_CSV = "rsna_data/train_series_meta.csv"
data = pd.read_csv(TRAIN_CSV)

for index, row in data.iterrows():
  # DCM 파일이 있는 디렉토리 경로 설정
  dcm_directory = 'rsna_data/train_images/' + str(int(row['patient_id'])) + '/' + str(int(row['series_id']))
  print(str(int(row['series_id'])))

  # DCM 파일을 로드하여 target size 크기의 배열로 가져옴
  target_size = (225, 225, 10)
  loaded_images = load_and_resize_dcm_images(dcm_directory, target_size)
  print(loaded_images.shape)

  num_columns = 4
  num_rows = (loaded_images.shape[2] + num_columns - 1) // num_columns

  # Create a figure with subplots
  fig, axs = plt.subplots(num_rows, num_columns, figsize=(20, 20))

  for i in range(loaded_images.shape[2]):
      row_index = i // num_columns
      col_index = i % num_columns
      ax = axs[row_index, col_index]
      ax.imshow(loaded_images[:, :, i], cmap='gray')
      ax.axis('off')
      ax.set_title(f'Resized Image {i+1}')

  # Remove any empty subplots
  for i in range(loaded_images.shape[2], num_rows * num_columns):
      row_index = i // num_columns
      col_index = i % num_columns
      axs[row_index, col_index].axis('off')

  plt.tight_layout()
  plt.show()

In [None]:
# Original -> save as PNG

def load_resize_save_dcm_images(directory, target_size):
    # 디렉토리에서 DCM 파일 목록을 가져옴
    dcm_files = [f for f in os.listdir(directory)]
    dcm_files = sorted(dcm_files, key=lambda x: int(x.split('.')[0]))

    # 중간 3개의 파일 디렉토리만 뽑아서 DCM 파일 읽어옴
    image_path1 = os.path.join(directory, dcm_files[len(dcm_files)//2-1])
    image_path2 = os.path.join(directory, dcm_files[len(dcm_files)//2])
    image_path3 = os.path.join(directory, dcm_files[len(dcm_files)//2+1])

    image1 = pydicom.dcmread(image_path1)
    image2 = pydicom.dcmread(image_path2)
    image3 = pydicom.dcmread(image_path3)

    # 원하는 크기로 resize
    resized_image1 = cv2.resize(image1.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA).astype(np.uint8)
    resized_image2 = cv2.resize(image2.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA).astype(np.uint8)
    resized_image3 = cv2.resize(image3.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA).astype(np.uint8)

    # 이미지 합치기
    images = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    images[..., 0] = resized_image1
    images[..., 1] = resized_image2
    images[..., 2] = resized_image3

    return images

TRAIN_CSV = "rsna_data/train_series_meta.csv"
data = pd.read_csv(TRAIN_CSV)

for index, row in data.iterrows():
  # DCM 파일이 있는 디렉토리 경로 설정
  dcm_directory = 'rsna_data/train_images/' + str(int(row['patient_id'])) + '/' + str(int(row['series_id']))
  print(str(int(row['series_id'])))

  # DCM 파일을 로드하여 target size 크기의 배열로 가져옴
  target_size = (256,256)
  loaded_images = load_resize_save_dcm_images(dcm_directory, target_size)

  output_directory = 'png_jjw/'  # Define the directory where you want to save PNG files

  for i in range(loaded_images.shape[2]):
    # Construct a filename for each slice
    output_subdirectory = os.path.join(output_directory, str(int(row['patient_id'])), str(int(row['series_id'])), 'img_256x256_d1_original')
    os.makedirs(output_subdirectory, exist_ok=True)  # Create subdirectories if they don't exist

    filename = os.path.join(output_subdirectory, f"image_{i+1:03d}.png")

    # Save the slice as a PNG file
    cv2.imwrite(filename, loaded_images[:, :, i])

In [None]:
# Normalization -> save as PNG

def load_resize_save_dcm_images(directory, target_size):
    # 디렉토리에서 DCM 파일 목록을 가져옴
    dcm_files = [f for f in os.listdir(directory)]
    dcm_files = sorted(dcm_files, key=lambda x: int(x.split('.')[0]))

    # 중간 3개의 파일 디렉토리만 뽑아서 DCM 파일 읽어옴
    image_path1 = os.path.join(directory, dcm_files[len(dcm_files)//2-1])
    image_path2 = os.path.join(directory, dcm_files[len(dcm_files)//2])
    image_path3 = os.path.join(directory, dcm_files[len(dcm_files)//2+1])

    image1 = pydicom.dcmread(image_path1)
    image2 = pydicom.dcmread(image_path2)
    image3 = pydicom.dcmread(image_path3)

    # 원하는 크기로 resize + normalization
    resized_image1 = cv2.resize(image1.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
    rescaled_image1 = (resized_image1-np.min(resized_image1))/(np.max(resized_image1)-np.min(resized_image1))
    rescaled_image1 = (rescaled_image1*256).astype(np.uint8)

    resized_image2 = cv2.resize(image2.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
    rescaled_image2 = (resized_image2-np.min(resized_image2))/(np.max(resized_image2)-np.min(resized_image2))
    rescaled_image2 = (rescaled_image2*256).astype(np.uint8)

    resized_image3 = cv2.resize(image3.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
    rescaled_image3 = (resized_image3-np.min(resized_image3))/(np.max(resized_image3)-np.min(resized_image3))
    rescaled_image3 = (rescaled_image3*256).astype(np.uint8)

    # 이미지 합치기
    images = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    images[..., 0] = rescaled_image1
    images[..., 1] = rescaled_image2
    images[..., 2] = rescaled_image3

    return images

TRAIN_CSV = "rsna_data/train_series_meta.csv"
data = pd.read_csv(TRAIN_CSV)

for index, row in data.iterrows():
  # DCM 파일이 있는 디렉토리 경로 설정
  dcm_directory = 'rsna_data/train_images/' + str(int(row['patient_id'])) + '/' + str(int(row['series_id']))
  print(str(int(row['series_id'])))

  # DCM 파일을 로드하여 target size 크기의 배열로 가져옴
  target_size = (256, 256)
  loaded_images = load_resize_save_dcm_images(dcm_directory, target_size)

  output_directory = 'png_jjw/'  # Define the directory where you want to save PNG files

  for i in range(loaded_images.shape[2]):
    # Construct a filename for each slice
    output_subdirectory = os.path.join(output_directory, str(int(row['patient_id'])), str(int(row['series_id'])), 'img_256x256_d1_normalization')
    os.makedirs(output_subdirectory, exist_ok=True)  # Create subdirectories if they don't exist

    filename = os.path.join(output_subdirectory, f"image_{i+1:03d}.png")

    # Save the slice as a PNG file
    cv2.imwrite(filename, loaded_images[:, :, i])

In [3]:
# Original_wide range -> save as PNG

def load_resize_save_dcm_images(directory, target_size):
    # 디렉토리에서 DCM 파일 목록을 가져옴
    dcm_files = [f for f in os.listdir(directory)]
    dcm_files = sorted(dcm_files, key=lambda x: int(x.split('.')[0]))

    # 중간 3개의 파일 디렉토리만 뽑아서 DCM 파일 읽어옴
    image_path1 = os.path.join(directory, dcm_files[len(dcm_files)//4])
    image_path2 = os.path.join(directory, dcm_files[len(dcm_files)//2])
    image_path3 = os.path.join(directory, dcm_files[len(dcm_files)*3//4])

    image1 = pydicom.dcmread(image_path1)
    image2 = pydicom.dcmread(image_path2)
    image3 = pydicom.dcmread(image_path3)

    # 원하는 크기로 resize
    resized_image1 = cv2.resize(image1.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA).astype(np.uint8)
    resized_image2 = cv2.resize(image2.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA).astype(np.uint8)
    resized_image3 = cv2.resize(image3.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA).astype(np.uint8)

    # 이미지 합치기
    images = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    images[..., 0] = resized_image1
    images[..., 1] = resized_image2
    images[..., 2] = resized_image3

    return images

TRAIN_CSV = "rsna_data/train_series_meta.csv"
data = pd.read_csv(TRAIN_CSV)

for index, row in data.iloc[2700:].iterrows():
  # DCM 파일이 있는 디렉토리 경로 설정
  dcm_directory = 'rsna_data/train_images/' + str(int(row['patient_id'])) + '/' + str(int(row['series_id']))
  print(str(int(row['series_id'])))

  # DCM 파일을 로드하여 target size 크기의 배열로 가져옴
  target_size = (256,256)
  loaded_images = load_resize_save_dcm_images(dcm_directory, target_size)

  output_directory = 'png_jjw/'  # Define the directory where you want to save PNG files

  for i in range(loaded_images.shape[2]):
    # Construct a filename for each slice
    output_subdirectory = os.path.join(output_directory, str(int(row['patient_id'])), str(int(row['series_id'])), 'img_256x256_d3_original')
    os.makedirs(output_subdirectory, exist_ok=True)  # Create subdirectories if they don't exist

    filename = os.path.join(output_subdirectory, f"image_{i+1:03d}.png")

    # Save the slice as a PNG file
    cv2.imwrite(filename, loaded_images[:, :, i])

427
29040
33093
5868
58214
61772
58069
22566
33019
28792
64389
31928
58800
39654
43057
37872
61747
18095
63554
5479
14327
36110
62389
45406
38489
49108
13666
53345
26168
6301
59734
64933
25272
581
52692
14735
32040
7198
13231
55933
64536
35840
17250
8744
6423
13171
25158
60194
42949
62267
48709
40786
22203
5460
23488
62538
10426
42410
61449
31188
43972
28727
53083
23837
35661
33966
65292
28356
35095
55959
21519
26697
25962
16102
31553
37930
23291
9085
36727
31470
46115
8054
15783
52396
54524
10959
51017
21282
12236
2942
45369
60075
45286
7286
46107
5093
14815
53560
26843
34833
12816
39144
4563
50198
32387
46912
6344
52419
10244
9661
53320
17785
26376
33165
30213
62011
21136
12082
6483
30035
33777
42083
24492
42287
29596
55278
27024
908
31416
36062
11431
30279
54882
44607
59664
32734
45454
53605
32545
20063
65416
12125
37147
17077
45790
44615
46118
44417
8161
38290
4127
51976
3382
47756
32633
30064
4890
17272
50460
34286
45306
9588
12524
51518
46583
63663
53842
63596
36147
32760
33001
2

In [4]:
# Normalizatiol_wide range -> save as PNG

def load_resize_save_dcm_images(directory, target_size):
    # 디렉토리에서 DCM 파일 목록을 가져옴
    dcm_files = [f for f in os.listdir(directory)]
    dcm_files = sorted(dcm_files, key=lambda x: int(x.split('.')[0]))

    # 중간 3개의 파일 디렉토리만 뽑아서 DCM 파일 읽어옴
    image_path1 = os.path.join(directory, dcm_files[len(dcm_files)//4])
    image_path2 = os.path.join(directory, dcm_files[len(dcm_files)//2])
    image_path3 = os.path.join(directory, dcm_files[len(dcm_files)*3//4])

    image1 = pydicom.dcmread(image_path1)
    image2 = pydicom.dcmread(image_path2)
    image3 = pydicom.dcmread(image_path3)

    # 원하는 크기로 resize + normalization
    resized_image1 = cv2.resize(image1.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
    rescaled_image1 = (resized_image1-np.min(resized_image1))/(np.max(resized_image1)-np.min(resized_image1))
    rescaled_image1 = (rescaled_image1*256).astype(np.uint8)

    resized_image2 = cv2.resize(image2.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
    rescaled_image2 = (resized_image2-np.min(resized_image2))/(np.max(resized_image2)-np.min(resized_image2))
    rescaled_image2 = (rescaled_image2*256).astype(np.uint8)

    resized_image3 = cv2.resize(image3.pixel_array, (target_size[0], target_size[1]), interpolation=cv2.INTER_AREA)
    rescaled_image3 = (resized_image3-np.min(resized_image3))/(np.max(resized_image3)-np.min(resized_image3))
    rescaled_image3 = (rescaled_image3*256).astype(np.uint8)

    # 이미지 합치기
    images = np.zeros((target_size[0], target_size[1], 3), dtype=np.uint8)
    images[..., 0] = rescaled_image1
    images[..., 1] = rescaled_image2
    images[..., 2] = rescaled_image3

    return images

TRAIN_CSV = "rsna_data/train_series_meta.csv"
data = pd.read_csv(TRAIN_CSV)

for index, row in data.iterrows():
  # DCM 파일이 있는 디렉토리 경로 설정
  dcm_directory = 'rsna_data/train_images/' + str(int(row['patient_id'])) + '/' + str(int(row['series_id']))
  print(str(int(row['series_id'])))

  # DCM 파일을 로드하여 target size 크기의 배열로 가져옴
  target_size = (256, 256)
  loaded_images = load_resize_save_dcm_images(dcm_directory, target_size)

  output_directory = 'png_jjw/'  # Define the directory where you want to save PNG files

  for i in range(loaded_images.shape[2]):
    # Construct a filename for each slice
    output_subdirectory = os.path.join(output_directory, str(int(row['patient_id'])), str(int(row['series_id'])), 'img_256x256_d3_normalization')
    os.makedirs(output_subdirectory, exist_ok=True)  # Create subdirectories if they don't exist

    filename = os.path.join(output_subdirectory, f"image_{i+1:03d}.png")

    # Save the slice as a PNG file
    cv2.imwrite(filename, loaded_images[:, :, i])

21057
51033
18667
47578
29700
42932
17486
50172
37324
46839
8139
27573
42418
58500
1554
53380
4816
17557
18334
50253
65236
16066
30522
40471
14541
51162
2346
60095
24515
60982
14254
37142
14945
10410
31085
47009
52259
15679
4991
4405
48503
65369
4930
60534
21579
26790
27106
44149
17060
59654
16281
26870
29394
29529
14324
3716
62306
27005
59033
48437
63796
46573
50314
15721
65139
27788
38360
56132
37069
30843
47610
54600
34169
12039
53000
3609
58150
53877
5611
58443
18685
37630
19484
29137
8511
24201
31481
13041
29167
48987
54368
28997
6366
2703
57412
51300
48992
24276
39434
2204
16846
53429
1126
28676
16534
64815
37533
1383
64202
48184
4654
5547
6116
26353
56112
17580
32735
14609
51515
41167
39013
37803
37456
25954
20475
46686
52851
11416
55528
28720
57014
5605
12057
34925
5353
17000
57414
17037
54658
13209
55742
10773
39389
33599
60003
13991
51844
54660
42623
54095
28841
16498
47491
31965
39072
65382
19063
64739
45638
47364
37127
21217
8218
25323
39773
57371
5102
30378
5223
29733
4727

In [None]:
# PNG load -> show

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

# PNG 파일 경로를 지정합니다.
#png_file_path = "png_jjw/10004/21057/img_256x256_d1_original/image_001.png"
png_file_path = "png_jjw/10004/21057/img_256x256_d1_normalization/image_001.png"

# PNG 파일을 로드합니다.
img = mpimg.imread(png_file_path)
print(img.shape)

# 그래프를 생성하고 PNG 이미지를 표시합니다.
plt.imshow(img, cmap='gray')
plt.axis('off')  # x 및 y 축 레이블 숨김
plt.show()

In [None]:
# 폴더 삭제 방법

import os
import shutil

def delete_folder(folder_path):
    try:
        # 폴더가 존재하는지 확인하고 있으면 삭제합니다.
        if os.path.exists(folder_path):
            shutil.rmtree(folder_path)
            print(f"{folder_path} 폴더를 삭제했습니다.")
        else:
            print(f"{folder_path} 폴더가 존재하지 않습니다.")
    except Exception as e:
        print(f"폴더 삭제 중 오류 발생: {str(e)}")

# 삭제할 폴더 경로를 지정합니다.
folder_to_delete = "png_test_jjw"

# 폴더를 삭제하는 함수 호출
delete_folder(folder_to_delete)

png_test_jjw 폴더가 존재하지 않습니다.
