In [1]:
import numpy as np
import cv2
import os
from scipy import fftpack
import glob
import shutil
import pydicom  

In [2]:
import os
import shutil
import re

def organize_and_clean_files(base_path, target_path):
    
    sub_folders = ["PREOP UZ", "PREOP AP", "POSTOP UZ", "POSTOP AP"]
    
    if not os.path.exists(target_path):
        os.makedirs(target_path)

    for folder in sub_folders:
        folder_path = os.path.join(base_path, folder)
        if not os.path.exists(folder_path):
            print(f"Klasör bulunamadı: {folder_path}")
            continue

        for file_name in os.listdir(folder_path):
            if not file_name.endswith(".dcm"):
                continue
            match = re.search(r"^(.*?)\s*(POSTOP|PREOP)", file_name)
            
            if match:
                patient_name_raw = match.group(1).strip()
                patient_folder_name = patient_name_raw.replace(" ", "_").lower()
                
                suffix = folder.replace(" ", "_").lower()
                
                patient_dir = os.path.join(target_path, patient_folder_name)
                os.makedirs(patient_dir, exist_ok=True)
                
                new_file_name = f"{patient_folder_name}_{suffix}.dcm"
                
                source_full_path = os.path.join(folder_path, file_name)
                target_full_path = os.path.join(patient_dir, new_file_name)
                
                shutil.copy2(source_full_path, target_full_path)
            else:
                print(f"Format eşleşmedi, atlanıyor: {file_name}")

    print(f"new dataset saved under {target_path}")

organize_and_clean_files("raw_data", "seperated_data")

new dataset saved under seperated_data


In [3]:
import os
import re

def create_patient_dict_with_missing(base_path):
    """
    { 'name_surname': ['path/to/preop_uz', '', 'path/to/postop_uz', ''] }
    """
    sub_folders = ["PREOP UZ", "PREOP AP", "POSTOP UZ", "POSTOP AP"]
    patient_dict = {}
    
    all_patients = set()
    
    for folder in sub_folders:
        folder_path = os.path.join(base_path, folder)
        if not os.path.exists(folder_path):
            continue
            
        for file_name in os.listdir(folder_path):
            if file_name.endswith(".dcm"):
                match = re.search(r"^(.*?)\s*(POSTOP|PREOP)", file_name)
                if match:
                    patient_name = match.group(1).strip()
                    all_patients.add(patient_name)

    for patient in all_patients:
        patient_paths = []
        
        for folder in sub_folders:
            folder_path = os.path.join(base_path, folder)
            found = False
            
            if os.path.exists(folder_path):
                for file_name in os.listdir(folder_path):
                    if file_name.startswith(patient) and file_name.endswith(".dcm"):
                        full_path = os.path.join(folder_path, file_name)
                        patient_paths.append(full_path)
                        found = True
                        break
            
            if not found:
                patient_paths.append("") 
        
        patient_dict[patient] = patient_paths
        
    return patient_dict

base_path = "./raw_data"
patients = create_patient_dict_with_missing(base_path)

In [4]:
len(patients)

232

In [5]:
def calculate_category_medians(patient_dict):
    """
    Kategorilere göre (0: PREOP UZ, 1: PREOP AP, 2: POSTOP UZ, 3: POSTOP AP)
    ayrı ayrı medyan boyutları hesaplar.
    """
    # 4 kategori için ayrı listeler tutalım
    categories_dims = {
        0: {"widths": [], "heights": []}, # PREOP UZ
        1: {"widths": [], "heights": []}, # PREOP AP
        2: {"widths": [], "heights": []}, # POSTOP UZ
        3: {"widths": [], "heights": []}  # POSTOP AP
    }
    
    print("Kategorik medyan boyutlar hesaplanıyor...")

    for patient, paths in patient_dict.items():
        for i in range(4):
            file_path = paths[i]
            
            # Eğer dosya yolu boş değilse ve dosya mevcutsa boyutunu oku
            if file_path != "" and os.path.exists(file_path):
                try:
                    # Görüntü verisini yüklemeden sadece metadata'dan boyut okumak daha hızlıdır
                    ds = pydicom.dcmread(file_path, stop_before_pixels=True)
                    h, w = ds.Rows, ds.Columns
                    
                    categories_dims[i]["widths"].append(w)
                    categories_dims[i]["heights"].append(h)
                except Exception as e:
                    print(f"Hata: {file_path} okunamadı. {e}")

    # Her kategori için medyanları hesapla
    final_medians = {}
    category_names = ["PREOP_UZ", "PREOP_AP", "POSTOP_UZ", "POSTOP_AP"]
    
    for i in range(4):
        if categories_dims[i]["widths"]: # Liste boş değilse
            m_w = int(np.median(categories_dims[i]["widths"]))
            m_h = int(np.median(categories_dims[i]["heights"]))
            final_medians[i] = (m_w, m_h)
            print(f"-> {category_names[i]} Medyan Boyut: {m_w}x{m_h}")
        else:
            final_medians[i] = (256, 256) # Fallback: Veri yoksa standart boyut
            print(f"-> {category_names[i]} için veri bulunamadı, varsayılan (256, 256) atandı.")

    return final_medians

medians = calculate_category_medians(patients)

Kategorik medyan boyutlar hesaplanıyor...
-> PREOP_UZ Medyan Boyut: 1760x2140
-> PREOP_AP Medyan Boyut: 1760x2140
-> POSTOP_UZ Medyan Boyut: 1760x2140
-> POSTOP_AP Medyan Boyut: 1760x2140


In [6]:
medians

{0: (1760, 2140), 1: (1760, 2140), 2: (1760, 2140), 3: (1760, 2140)}

In [7]:
def apply_butterworth_filter(img, order=4, cutoff_factor=0.5):
    rows, cols = img.shape
    f_transform = np.fft.fft2(img)
    f_shift = np.fft.fftshift(f_transform)

    crow, ccol = rows // 2, cols // 2
    y, x = np.ogrid[-crow:rows-crow, -ccol:cols-ccol]

    d0 = (min(rows, cols) / 2) * cutoff_factor
    dist = np.sqrt(x**2 + y**2)

    mask = 1 / (1 + (dist / d0)**(2 * order))

    f_shift_filtered = f_shift * mask
    f_ishift = np.fft.ifftshift(f_shift_filtered)
    img_back = np.fft.ifft2(f_ishift)

    return np.abs(img_back)

In [8]:
def resize_image(img, target_size):
    return cv2.resize(img, target_size, interpolation=cv2.INTER_LINEAR)

In [9]:
def save_as_npy(data, filename):
    np.save(filename, data)
    print(f"file saved: {filename}")

In [10]:
patients["FATMA TURGUT"]

['./raw_data/PREOP UZ/FATMA TURGUT PREOP UZ.Seq1.Ser1009.Img1.dcm',
 './raw_data/PREOP AP/FATMA TURGUT PREOP AP.Seq1.Ser1.Img1.dcm',
 '',
 './raw_data/POSTOP AP/FATMA TURGUT POSTOP AP.Seq2.Ser1002.Img1.dcm']

In [39]:
selected_category = "PREOP AP"
category_index = 1 
base_path = "./raw_data"
target_size = medians[category_index] 
target_h, target_w = target_size[1], target_size[0]

category_path = os.path.join(base_path, selected_category)
file_list = sorted([f for f in os.listdir(category_path) if f.endswith('.dcm')])
num_images = len(file_list)

output_filename = f"{selected_category.replace(' ', '_').lower()}.npy"

data_shape = (num_images, target_h, target_w)
fp = np.memmap(output_filename, dtype='float32', mode='w+', shape=data_shape)

print(f"Starting: {selected_category} | Total: {num_images}")

for i, file_name in enumerate(file_list):
    try:
        file_path = os.path.join(category_path, file_name)
        
        ds = pydicom.dcmread(file_path)
        img = ds.pixel_array.astype(np.float32)
        
        img = (img - np.min(img)) / (np.max(img) - np.min(img) + 1e-8)
        
        filtered = apply_butterworth_filter(img, order=4)
        resized = resize_image(filtered, target_size)
        
        fp[i, :, :] = resized
        
        if i % 10 == 0:
            fp.flush()
            print(f"Progress: {i}/{num_images}")
            
    except Exception as e:
        print(f"Error at {file_name}: {e}")
        fp[i, :, :] = np.zeros((target_h, target_w), dtype=np.float32)

fp.flush()
del fp 
print(f"Finished: {output_filename}")

Starting: PREOP AP | Total: 205
Progress: 0/205
Progress: 10/205
Progress: 20/205
Progress: 30/205
Progress: 40/205
Progress: 50/205
Progress: 60/205
Progress: 70/205
Progress: 80/205
Progress: 90/205
Progress: 100/205
Progress: 110/205
Progress: 120/205
Progress: 130/205
Progress: 140/205
Progress: 150/205
Progress: 160/205
Progress: 170/205
Progress: 180/205
Progress: 190/205
Progress: 200/205
Finished: preop_ap.npy


In [34]:
# find number of images under a category
selected_category = "PREOP UZ"
category_index = 1 
base_path = "./raw_data"
target_size = medians[category_index] 
target_h, target_w = target_size[1], target_size[0]

category_path = os.path.join(base_path, selected_category)
file_list = sorted([f for f in os.listdir(category_path) if f.endswith('.dcm')])
num_images = len(file_list)
num_images

56

In [41]:
# memmap to actual .npy
import os
import numpy as np
file_path = "preop_ap.npy"
target_size = (1760, 2140)
bytes_per_pixel = 4

total_bytes = os.path.getsize(file_path)

frame_bytes = target_size[0] * target_size[1] * bytes_per_pixel

num_images = total_bytes // frame_bytes

final_shape = (num_images, target_size[1], target_size[0])

temp_data = np.memmap(file_path, dtype='float32', mode='r', shape=final_shape)
np.save(file_path, temp_data)

In [None]:
import os
import re

def get_perfect_patient_dict(base_path):
    categories = ["PREOP UZ", "PREOP AP", "POSTOP UZ", "POSTOP AP"]
    potential_names = set()
    
    for cat in categories:
        cat_path = os.path.join(base_path, cat)
        if os.path.exists(cat_path):
            for file_name in os.listdir(cat_path):
                if file_name.endswith(".dcm"):
                    match = re.search(r"^(.*?)\s*(POSTOP|PREOP)", file_name)
                    if match:
                        potential_names.add(match.group(1).strip())

    perfect_dict = {}

    for name in sorted(potential_names):
        temp_paths = []
        is_complete = True
        
        for cat in categories:
            cat_path = os.path.join(base_path, cat)
            found_path = ""
            
            if os.path.exists(cat_path):
                for file_name in os.listdir(cat_path):
                    if file_name.startswith(name) and file_name.endswith(".dcm"):
                        found_path = os.path.join(cat_path, file_name)
                        break
            
            # Eğer bir kategoride bile resim bulunamazsa is_complete False olur
            if found_path == "":
                is_complete = False
                break
            else:
                temp_paths.append(found_path)
        
        # Sadece 4 resmi de bulunan hastaları sözlüğe ekle
        if is_complete:
            perfect_dict[name] = temp_paths
            
    return perfect_dict

# --- Kullanım ---
base_path = "./raw_data"
final_perfect_dict = get_perfect_patient_dict(base_path)

print(f"Toplam tam verili hasta sayısı: {len(final_perfect_dict)}")

Toplam tam verili hasta sayısı: 12


In [28]:
import numpy as np
import os
import pydicom

# Ayarlar
target_size = (1760, 2140) # (Genişlik, Yükseklik)
target_w, target_h = target_size
num_patients = len(final_perfect_dict)
output_filename = "final_dataset_4d.npy"

# 4D Shape: (Hasta Sayısı, 4 Çekim, Yükseklik, Genişlik)
data_shape = (num_patients, 4, target_h, target_w)

# Diskte yer ayır (RAM harcamaz)
fp = np.memmap(output_filename, dtype='float32', mode='w+', shape=data_shape)

print(f"Dataset oluşturuluyor. Toplam Hasta: {num_patients} | Hedef Boyut: {data_shape}")

for i, (name, paths) in enumerate(final_perfect_dict.items()):
    print(f"İşleniyor ({i+1}/{num_patients}): {name}")
    
    for j, file_path in enumerate(paths):
        try:
            # 1. Import
            ds = pydicom.dcmread(file_path)
            img = ds.pixel_array.astype(np.float32)
            
            # Normalizasyon
            img = (img - np.min(img)) / (np.max(img) - np.min(img) + 1e-8)
            
            # 2. Butterworth 4. Derece Filtre
            filtered = apply_butterworth_filter(img, order=4)
            
            # 3. Resize
            resized = resize_image(filtered, target_size)
            
            # 4. 4D matrisin ilgili hücresine yaz (Hasta i, Çekim j)
            fp[i, j, :, :] = resized
            
        except Exception as e:
            print(f"Hata: {name}, Çekim {j} -> {e}")
            fp[i, j, :, :] = np.zeros((target_h, target_w), dtype=np.float32)
            
    # Her hasta bittiğinde diske yazmayı onayla
    fp.flush()

# Bağlantıları temizle
del fp
print(f"\nİşlem tamamlandı: {output_filename} hazır.")

Dataset oluşturuluyor. Toplam Hasta: 12 | Hedef Boyut: (12, 4, 2140, 1760)
İşleniyor (1/12): EMİNE EREN
İşleniyor (2/12): EVA LUGOVAIA
İşleniyor (3/12): FATMA EMEL YAŞA
İşleniyor (4/12): HATİCE SEZER
İşleniyor (5/12): HATİCE ÇALIŞKAN
İşleniyor (6/12): KAFİYE ÇAKMAK
İşleniyor (7/12): MİNE ERDEN
İşleniyor (8/12): NEBAHAT ZAZAOĞLU
İşleniyor (9/12): NURHAN IŞIK
İşleniyor (10/12): SOLMAZ ÜLKER
İşleniyor (11/12): SUZAN BOZKAYA
İşleniyor (12/12): TİMUR ÇAKMAK

İşlem tamamlandı: final_dataset_4d.npy hazır.


In [42]:
# validity check
def validate_dataset(file_path, n_patients, size):
    if not os.path.exists(file_path):
        return
    
    expected = n_patients * size[1] * size[0] * 4
    actual = os.path.getsize(file_path)
    
    print(f"Expected: {expected} bytes")
    print(f"Actual:   {actual} bytes")
    print("Match: " + ("YES" if expected == actual else f"NO (Diff: {actual - expected})"))

validate_dataset("preop_ap.npy", 205, (1760, 2140))

Expected: 3088448000 bytes
Actual:   3088448128 bytes
Match: NO (Diff: 128)
