In [2]:
import os
import numpy as np
import tifffile as tiff
import torch
import torch.nn as nn
import dask
from dask import delayed, compute
import io

# Define the Resize3D transformation
class Resize3D(object):
    """Custom transform to resize 3D images."""
    def __init__(self, size):
        self.size = size  # Desired output size (D, H, W)
        
    def __call__(self, sample):
        sample = sample.unsqueeze(0)  # Add batch dimension
        sample = nn.functional.interpolate(sample, size=self.size, mode='trilinear', align_corners=False)
        sample = sample.squeeze(0)    # Remove batch dimension
        return sample

# Function to load, resize, and save a single image as .npz
@delayed
def process_and_save_image(filepath, output_dir, resize_transform, normalize_mean=0.43216, normalize_std=0.22803):
    # Load and normalize the .tif file
    image = tiff.imread(filepath).astype(np.float32)
    image = (image - np.min(image)) / (np.max(image) - np.min(image))
    
    # Convert to torch tensor, resize, and normalize
    image = torch.from_numpy(image).unsqueeze(0)  # Shape becomes (1, D, H, W)
    resized_image = resize_transform(image)
    resized_image = (resized_image - normalize_mean) / normalize_std  # Apply normalization
    
    # Convert to numpy and save as compressed .npz
    resized_image_np = resized_image.squeeze(0).numpy()
    filename = os.path.splitext(os.path.basename(filepath))[0] + '.npz'
    save_path = os.path.join(output_dir, filename)
    np.savez_compressed(save_path, resized_image_np)
    
    print(f"Processed and saved: {save_path}")

def preprocess_dataset(root_dir, output_dir, resize_size=(32, 448, 448)):
    # Create output directories if they don't exist
    os.makedirs(output_dir, exist_ok=True)
    for class_name in ['dead', 'live']:
        os.makedirs(os.path.join(output_dir, class_name), exist_ok=True)

    # Initialize the resize transform
    resize_transform = Resize3D(resize_size)
    
    # List to store Dask tasks
    tasks = []
    
    # Iterate over 'dead' and 'live' folders
    for class_name in ['dead', 'live']:
        class_dir = os.path.join(root_dir, class_name)
        output_class_dir = os.path.join(output_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.tif'):
                filepath = os.path.join(class_dir, filename)
                tasks.append(process_and_save_image(filepath, output_class_dir, resize_transform))

    # Run tasks in parallel
    compute(*tasks)

# Define the paths
root_dir = r"C:\rkka_Projects\cell_death_v1\data\before_preprocess\uniform_size_not_compressed\test"  # Directory containing the 'dead' and 'live' folders with .tif files
output_dir = r"C:\rkka_Projects\cell_death_v1\data\test"  # Directory to save the resized and compressed .npz files

# Preprocess the dataset and save as .npz files
preprocess_dataset(root_dir, output_dir)


Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\dead\48_-1_6.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\329_0.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\48_1_8.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\450_1_1.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\48_0_1.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\48_1_3.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\dead\48_-2_1.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\dead\48_-1_2.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\dead\48_-1_4.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\450_0_1.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\dead\351_-2.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\test\live\318_1.npz
Processed and saved: C:\rkka_Projects\cell_death_v1\data\tes