In [None]:
from pathlib import Path

In [None]:
src_folder = Path("dcm_data")
files = [file for file in src_folder.rglob("**/*.dcm")]
files.extend([file for file in src_folder.rglob("**/*.DCM")])

In [None]:
len(files)

In [None]:
from pydicom import dcmread
from PIL import Image, ImageOps
import numpy as np

def center_pad_to_square(dcm_path: Path, fill_color=0) -> Image.Image:
    """
    Loads a DICOM file and returns a center-padded square PIL image.
    
    Parameters:
    - dcm_path: Path to the DICOM file
    - fill_color: Padding color (default is 0 for black)
    
    Returns:
    - PIL.Image.Image object (square, grayscale)
    """
    # Load DICOM and extract pixel array
    ds = dcmread(dcm_path)
    pixel_array = ds.pixel_array

    # Normalize to 0–255
    img = (pixel_array - np.min(pixel_array)) / (np.max(pixel_array) - np.min(pixel_array)) * 255
    img = img.astype(np.uint8)

    # Convert to PIL image
    pil_img = Image.fromarray(img)

    # Get original dimensions
    origin_width, origin_height = pil_img.size
    if origin_width < origin_height:
        pil_img = pil_img.transpose(Image.Transpose.ROTATE_90)
    width, height = pil_img.size
    max_side = width

    # Calculate padding
    pad_left = (max_side - width) // 2
    pad_right = max_side - width - pad_left
    pad_top = (max_side - height) // 2
    pad_bottom = max_side - height - pad_top

    # Apply center padding
    padded_img = ImageOps.expand(pil_img, border=(pad_left, pad_top, pad_right, pad_bottom), fill=fill_color)
    return padded_img
 

In [5]:
for file in files:
    img = center_pad_to_square(file)
    img = img.resize((512, 512))
    tgt_file = file.with_suffix(".jpg")
    img.save(tgt_file)

In [6]:
labels = {
    "圆形": "circle",
    "未熔合": "lack_of_fusion",
    "未焊透": "lack_of_penetration",
    "条形": "line",
    "内凹": "pit",
    "咬边": "undercut",
    "合格": "valid",
}

In [7]:
target_folder = Path("dataset-resized")
target_folder.mkdir(exist_ok=True, parents=True)

In [11]:
for file in src_folder.rglob("**/*.jpg"):
    abs_path = str(file.absolute())
    for k in labels.keys():
        if k in abs_path:
            tgt_path = target_folder / labels[k] / file.name
            tgt_path.parent.mkdir(exist_ok=True, parents=True)
            file.rename(tgt_path)

In [10]:
img = center_pad_to_square("dcm_data/金陵/1701-060-9093-003-C399-66-1-2-未熔合-圆形缺陷.DCM")

In [None]:
img.resize((512, 512))