In [2]:
import pydicom
import matplotlib.pyplot as plt
import os
import SimpleITK as sitk
import os
import shutil
import time

In [32]:
import pandas as pd

csv_path = "/Users/leniecka/python/id_final/SPIDER_cleaned/radiological_gradings.csv"
df = pd.read_csv(csv_path)

df['DiskMaskLabel'] = df['IVD label'] + 200

df = df[df['DiskMaskLabel'].isin([201,202,203,204,205])]

count_per_class = df['Pfirrman grade'].value_counts().sort_index()
print("Liczba przykładów w każdej klasie Pfirrmann (dla dysków L1/L2 → L5/S1):\n")
print(count_per_class)

pivot_table = df.pivot_table(index='DiskMaskLabel', columns='Pfirrman grade', aggfunc='size', fill_value=0)
print("\nLiczba przykładów dla każdego dysku i klasy Pfirrmann:")
print(pivot_table)

Liczba przykładów w każdej klasie Pfirrmann (dla dysków L1/L2 → L5/S1):

Pfirrman grade
1    164
2    211
3    322
4    225
5    168
Name: count, dtype: int64

Liczba przykładów dla każdego dysku i klasy Pfirrmann:
Pfirrman grade   1   2   3   4   5
DiskMaskLabel                     
201             23  28  59  49  59
202             22  26  72  57  41
203             31  43  71  47  26
204             39  50  69  35  25
205             49  64  51  37  17


In [9]:
import os
import random
import shutil
import SimpleITK as sitk
import numpy as np
import pandas as pd
import cv2

# ===== KONFIG =====
BASE_DIR = 'SPIDER_cleaned'
IMG_DIR = os.path.join(BASE_DIR, 'images')
MASK_DIR = os.path.join(BASE_DIR, 'masks')

OUTPUT_DIR = 'SPIDER_full_slices'
OUTPUT_IMG_DIR = os.path.join(OUTPUT_DIR, 'images')
OUTPUT_MASK_DIR = os.path.join(OUTPUT_DIR, 'masks')

for d in [OUTPUT_IMG_DIR, OUTPUT_MASK_DIR]:
    if os.path.exists(d):
        shutil.rmtree(d)
    os.makedirs(d)

CSV_PATH = "/Users/leniecka/python/id_final/SPIDER_cleaned/radiological_gradings.csv"
df = pd.read_csv(CSV_PATH)
df['DiskMaskLabel'] = df['IVD label'] + 200
df = df[df['DiskMaskLabel'].isin([201,202,203,204,205])]

rare_classes = [1,5]      # dla nich więcej slice'ów
common_classes = [2,3,4]  # dla nich 3 slice'y

# ===== Funkcja normalizacji do 0-255 =====
def normalize_to_png(img_slice):
    img_min = np.min(img_slice)
    img_max = np.max(img_slice)
    if img_max == img_min:
        return np.zeros_like(img_slice, dtype=np.uint8)
    img_norm = 255.0 * (img_slice - img_min) / (img_max - img_min)
    return img_norm.astype(np.uint8)

# ===== Przetwarzanie wszystkich plików =====
files = sorted([f for f in os.listdir(IMG_DIR) if f.endswith('.mha')])
random.seed(42)
random.shuffle(files)

for filename in files:
    img_path = os.path.join(IMG_DIR, filename)
    mask_path = os.path.join(MASK_DIR, filename)
    if not os.path.exists(mask_path):
        print(f"Mask missing: {filename}")
        continue

    try:
        # 1️⃣ Wczytaj i wymuś orientację sagittal RAS
        sitk_img = sitk.ReadImage(img_path)
        sitk_mask = sitk.ReadImage(mask_path)

        orient_filter = sitk.DICOMOrientImageFilter()
        orient_filter.SetDesiredCoordinateOrientation("LPS")
        sitk_img = orient_filter.Execute(sitk_img)
        sitk_mask = orient_filter.Execute(sitk_mask)

        arr_img = sitk.GetArrayFromImage(sitk_img)      # (Z,Y,X)
        arr_mask = sitk.GetArrayFromImage(sitk_mask)    # (Z,Y,X)
        mid = arr_img.shape[2] // 2  # środkowa oś X

        # Pacjent z nazwy pliku (zakładam format "PatientID_something.mha")
        patient_id = int(filename.split("_")[0])

        # Dyski 201-205
        for disk_label in range(201,206):
            df_row = df[(df['Patient']==patient_id) & (df['DiskMaskLabel']==disk_label)]
            if df_row.empty:
                continue
            pf_grade = int(df_row['Pfirrman grade'].values[0])

            # Określamy slice offsets
            offsets = [-1,0,1] if pf_grade in common_classes else [-2,-1,0,1,2]

            for offset in offsets:
                x_idx = mid + offset
                if x_idx < 0 or x_idx >= arr_img.shape[2]:
                    continue

                img_slice = arr_img[:, :, x_idx]
                mask_slice = arr_mask[:, :, x_idx]

                # Flip pionowy: głowa u góry
                img_slice = np.flipud(img_slice)
                mask_slice = np.flipud(mask_slice)

                # Normalizacja i zapis
                base_name = f"{filename.replace('.mha','')}_disk{disk_label}_slice{offset}_pf{pf_grade}"
                img_uint8 = normalize_to_png(img_slice)
                mask_uint8 = normalize_to_png(mask_slice)

                cv2.imwrite(os.path.join(OUTPUT_IMG_DIR, base_name + ".png"), img_uint8)
                cv2.imwrite(os.path.join(OUTPUT_MASK_DIR, base_name + ".png"), mask_uint8)

    except Exception as e:
        print(f"Error {filename}: {e}")

print("✅ Wszystkie slice zapisane w SPIDER_full_slices/images i masks w orientacji sagittal RAS.")

✅ Wszystkie slice zapisane w SPIDER_full_slices/images i masks w orientacji sagittal RAS.


In [None]:
import os
import random
import shutil
import SimpleITK as sitk
import numpy as np
import cv2
import pandas as pd

# ===== KONFIG =====
BASE_DIR = 'SPIDER_cleaned'
IMG_DIR = os.path.join(BASE_DIR, 'images')
MASK_DIR = os.path.join(BASE_DIR, 'masks')

OUTPUT_DIR = 'SPIDER_training'
GRADES = [1,2,3,4,5]

# Tworzenie folderów train/val -> grade1-grade5
for split in ['train', 'val']:
    split_dir = os.path.join(OUTPUT_DIR, split)
    if os.path.exists(split_dir):
        shutil.rmtree(split_dir)
    for grade in GRADES:
        os.makedirs(os.path.join(split_dir, f'grade{grade}'))

# Wczytaj CSV z Pfirrmann grade
CSV_PATH = "/Users/leniecka/python/id_final/SPIDER_cleaned/radiological_gradings.csv"
df = pd.read_csv(CSV_PATH)
df['DiskMaskLabel'] = df['IVD label'] + 200
df = df[df['DiskMaskLabel'].isin([201,202,203,204,205])]

RARE_CLASSES = [1,5]
COMMON_CLASSES = [2,3,4]

# ===== FUNKCJE =====
def normalize_to_png(img_slice):
    img_min = np.min(img_slice)
    img_max = np.max(img_slice)
    if img_max == img_min:
        return np.zeros_like(img_slice, dtype=np.uint8)
    img_norm = 255.0 * (img_slice - img_min) / (img_max - img_min)
    return img_norm.astype(np.uint8)

def tight_crop(img_slice, mask_slice, disc_label):
    """Wycinanie prostokąta wokół dysku"""
    y_idx, x_idx = np.where(mask_slice == disc_label)
    if len(y_idx)==0 or len(x_idx)==0:
        return None
    y_min, y_max = np.min(y_idx), np.max(y_idx)
    x_min, x_max = np.min(x_idx), np.max(x_idx)
    return img_slice[y_min:y_max+1, x_min:x_max+1]

# ===== Podział pacjentów na train/val =====
patients = df['Patient'].unique()
random.seed(42)
random.shuffle(patients)
split_idx = int(len(patients) * 0.8)
train_patients = patients[:split_idx]
val_patients = patients[split_idx:]
print(f"Train patients: {len(train_patients)}, Val patients: {len(val_patients)}")

# ===== PRZETWARZANIE =====
files = sorted([f for f in os.listdir(IMG_DIR) if f.endswith('.mha')])

for filename in files:
    img_path = os.path.join(IMG_DIR, filename)
    mask_path = os.path.join(MASK_DIR, filename)
    if not os.path.exists(mask_path):
        print(f"Mask missing: {filename}")
        continue

    try:
        # Wczytaj obraz i maskę
        sitk_img = sitk.ReadImage(img_path)
        sitk_mask = sitk.ReadImage(mask_path)

        # Wymuś orientację sagittal RAS
        orient_filter = sitk.DICOMOrientImageFilter()
        orient_filter.SetDesiredCoordinateOrientation("LPS")
        sitk_img = orient_filter.Execute(sitk_img)
        sitk_mask = orient_filter.Execute(sitk_mask)

        arr_img = sitk.GetArrayFromImage(sitk_img)      # (Z,Y,X)
        arr_mask = sitk.GetArrayFromImage(sitk_mask)    # (Z,Y,X)
        mid = arr_img.shape[2] // 2  # środkowa oś X

        # Id pacjenta
        patient_id = int(filename.split("_")[0])
        split_type = 'train' if patient_id in train_patients else 'val'

        # Dyski 201-205
        for disc_label in range(201,206):
            df_row = df[(df['Patient']==patient_id) & (df['DiskMaskLabel']==disc_label)]
            if df_row.empty:
                continue
            pf_grade = int(df_row['Pfirrman grade'].values[0])

            # Slice offsets
            if pf_grade in COMMON_CLASSES:
                offsets = [-1,0,1]
            else:  # RARE_CLASSES
                offsets = [-2,-1,0,1,2]

            # Filtruj istniejące slice'y
            valid_offsets = [o for o in offsets if 0 <= mid+o < arr_img.shape[2]]

            for o in valid_offsets:
                x_idx = mid + o
                img_slice = arr_img[:, :, x_idx]
                mask_slice = arr_mask[:, :, x_idx]

                # Flip pionowy: głowa u góry
                img_slice = np.flipud(img_slice)
                mask_slice = np.flipud(mask_slice)

                # Wytnij dysk z maski
                cropped = tight_crop(img_slice, mask_slice, disc_label)
                if cropped is None:
                    continue

                cropped = normalize_to_png(cropped)

                # Zapis do folderu train/val -> gradeX
                out_dir = os.path.join(OUTPUT_DIR, split_type, f'grade{pf_grade}')
                base_name = f"patient{patient_id}_disk{disc_label}_slice{o}_pf{pf_grade}.png"
                out_path = os.path.join(out_dir, base_name)
                cv2.imwrite(out_path, cropped)

    except Exception as e:
        print(f"Error {filename}: {e}")

print("Wszystkie dyski wycięte i zapisane w folderach train/val -> grade1-grade5.")

Train patients: 174, Val patients: 44
✅ Wszystkie dyski wycięte i zapisane w folderach train/val -> grade1-grade5.


In [None]:
import os
import re

OUTPUT_DIR = 'SPIDER_training'

def get_patient_ids(split_dir):
    patient_ids = set()
    for grade_folder in os.listdir(split_dir):
        grade_path = os.path.join(split_dir, grade_folder)
        if not os.path.isdir(grade_path):
            continue
        for fname in os.listdir(grade_path):
            match = re.match(r'patient(\d+)_', fname)
            if match:
                patient_ids.add(int(match.group(1)))
    return patient_ids

train_ids = get_patient_ids(os.path.join(OUTPUT_DIR, 'train'))
val_ids = get_patient_ids(os.path.join(OUTPUT_DIR, 'val'))

common_ids = train_ids & val_ids
print(f"Liczba pacjentów w train: {len(train_ids)}")
print(f"Liczba pacjentów w val: {len(val_ids)}")
if common_ids:
    print(f"Uwaga! Ci sami pacjenci w train i val: {common_ids}")
else:
    print("Brak wspólnych pacjentów w train i val. Podział poprawny.")

Liczba pacjentów w train: 168
Liczba pacjentów w val: 42
✅ Brak wspólnych pacjentów w train i val. Podział poprawny.


In [18]:
import os

OUTPUT_DIR = 'SPIDER_training'
splits = ['train', 'val']
grades = ['grade1', 'grade2', 'grade3', 'grade4', 'grade5']

for split in splits:
    print(f"\n===== {split.upper()} =====")
    for grade in grades:
        grade_dir = os.path.join(OUTPUT_DIR, split, grade)
        if os.path.exists(grade_dir):
            num_slices = len([f for f in os.listdir(grade_dir) if f.endswith('.png')])
            print(f"{grade}: {num_slices} slice(s)")
        else:
            print(f"{grade}: 0 slice(s)")


===== TRAIN =====
grade1: 530 slice(s)
grade2: 516 slice(s)
grade3: 736 slice(s)
grade4: 537 slice(s)
grade5: 668 slice(s)

===== VAL =====
grade1: 80 slice(s)
grade2: 116 slice(s)
grade3: 227 slice(s)
grade4: 135 slice(s)
grade5: 168 slice(s)
