In [9]:
import os
import shutil
import random
from tqdm import tqdm
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import glob
import pandas as pd
from utils.preprocess import denoise_with_sitk, denoise_with_ants, enhance_vessels_with_frangi

In [18]:
def create_vertebral_artery_dataset(pairs):
    
    data = []
    for img_path, mask_path in pairs:
        if not os.path.exists(img_path) or not os.path.exists(mask_path):
            print(f"Пропускаю отсутствующие файлы: {img_path} или {mask_path}")
            continue
        
        data.append({
            'image_path': img_path,
            'mask_path': mask_path
        })
    
    return pd.DataFrame(data)

data_dir = './dataset'

image_files = glob.glob(os.path.join(data_dir, '*.jpg'))
mask_files = [f.replace('.jpg', '-VA.png') for f in image_files]
pairs = [(img, mask) for img, mask in zip(image_files, mask_files)]

dataset = create_vertebral_artery_dataset(pairs)

In [19]:
def process_and_split_dataset(
    dataset,
    output_base_dir,
    method='sitk',  # 'sitk', 'ants', 'frangi'
    val_ratio=0.2,
    random_state=42
):
    os.makedirs(output_base_dir, exist_ok=True)
    
    # Разделяем на train и val
    train_data, val_data = train_test_split(dataset, test_size=val_ratio, random_state=random_state)
    
    splits = {'train': train_data, 'val': val_data}
    
    for split_name, split_data in splits.items():
        split_dir = os.path.join(output_base_dir, split_name)
        os.makedirs(split_dir, exist_ok=True)
        
        for idx, row in tqdm(split_data.iterrows(), total=len(split_data), desc=f"Processing {split_name} data"):
            try:
                image_path = row['image_path']
                mask_path = row['mask_path']
                
                image_filename = os.path.basename(image_path)
                mask_filename = os.path.basename(mask_path)
                
                name_img, ext_img = os.path.splitext(image_filename)
                name_mask, ext_mask = os.path.splitext(mask_filename)
                
                new_img_filename = f"{name_img}{ext_img}"
                new_mask_filename = f"{name_mask}{ext_mask}"
                
                output_img_path = os.path.join(split_dir, new_img_filename)
                output_mask_path = os.path.join(split_dir, new_mask_filename)
                
                # Проверяем, существуют ли уже обработанные файлы
                if os.path.exists(output_img_path) and os.path.exists(output_mask_path):
                    continue
                
                # Загружаем изображение
                image = Image.open(image_path).convert('RGB')
                image_array = np.array(image)
                
                # Обрабатываем изображение в зависимости от метода
                if method == 'sitk':
                    denoised_image = np.zeros_like(image_array)
                    for i in range(3):
                        denoised_image[:, :, i] = denoise_with_sitk(image_array[:, :, i])
                    processed_image = Image.fromarray(denoised_image.astype('uint8'))
                    
                elif method == 'ants':
                    grayscale_image = image.convert('L')
                    grayscale_array = np.array(grayscale_image)
                    denoised_array = denoise_with_ants(grayscale_array)
                    rgb_denoised = np.stack([denoised_array]*3, axis=2)
                    processed_image = Image.fromarray(rgb_denoised.astype('uint8'))
                    
                elif method == 'frangi':
                    grayscale_image = image.convert('L')
                    grayscale_array = np.array(grayscale_image)
                    enhanced_vessels = enhance_vessels_with_frangi(grayscale_array)
                    enhanced_rgb = np.stack([enhanced_vessels]*3, axis=-1)
                    processed_image = Image.fromarray(enhanced_rgb.astype('uint8'))
                    
                else:
                    raise ValueError(f"Unknown method: {method}")
                
                # Сохраняем обработанное изображение
                processed_image.save(output_img_path)
                
                # Копируем маску без изменений
                if os.path.exists(mask_path):
                    shutil.copy(mask_path, output_mask_path)
                else:
                    print(f"Warning: Mask file not found: {mask_path}")
            
            except Exception as e:
                print(f"Error processing files: Image: {image_path}, Mask: {mask_path}")
                print(f"Error details: {str(e)}")
    
    print(f"Processing complete. Dataset saved to {output_base_dir}")

In [20]:
process_and_split_dataset(dataset, 'new_dataset/', method='frangi')

Processing train data: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 11999.44it/s]
Processing val data: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 5999.00it/s]

Processing complete. Dataset saved to new_dataset/



