In [None]:
import os
import h5py
import torch
import torch.nn.functional as F
import numpy as np

# Parameters
data_path = '../../data/PoolBoiling-SubCooled-FC72-2D/redimensionalized'
output_path = '../../data/PoolBoiling-SubCooled-FC72-2D/DSRD_128x128'

os.makedirs(output_path, exist_ok=True)  # Ensure output directory exists

keys_to_downsample = [
    'temperature',
    'velx',
    'vely',
    'dfun',
    'pressure',
    'x',
    'y'
]

keys_to_copy = [
    'real-runtime-params',
    'int-runtime-params'
]


def downsample(data, target_size):
    """
    Downsample a tensor's spatial dimensions to the given target size.
    Args:
        data (torch.Tensor): 3D or 4D tensor [timesteps, channels, Y, X].
        target_size (tuple): Target size (height, width) for downsampling.
    Returns:
        torch.Tensor: Downsampled tensor.
    """
    _, _, height, width = data.shape
    new_height, new_width = target_size

    # Use PyTorch's interpolate for downsampling
    downsampled = F.interpolate(data, size=(new_height, new_width), mode='area')
    return downsampled


# Process each file in the directory
files = [f for f in os.listdir(data_path) if f.endswith('.hdf5')]
print(files)


for file in files:
    input_file = os.path.join(data_path, file)
    output_file = os.path.join(output_path, file)

    with h5py.File(input_file, 'r') as input_data:
        # Prepare output HDF5 file
        with h5py.File(output_file, 'w') as output_data:
            for key in input_data.keys(): 
                dataset = input_data[key][:]
                print(f"Processing key '{key}' in file '{file}' with shape {dataset.shape}")

                if key in keys_to_downsample:
                    # Downsample spatial-temporal data
                    if len(dataset.shape) >= 3:  # Process only 3D or 4D data
                        if len(dataset.shape) == 3:
                            dataset = torch.from_numpy(dataset).unsqueeze(1)  # Add channel dimension
                        else:
                            dataset = torch.from_numpy(dataset)

                        # Target size is now 128x128 for all downsampled keys
                        downsampled = downsample(dataset, (128, 128))
                        downsampled = downsampled.numpy()

                        # Remove channel dimension for datasets that were originally 3D
                        if downsampled.shape[1] == 1:
                            downsampled = downsampled[:, 0]
                    else:
                        raise ValueError(f"Key '{key}' expected to have spatial dimensions but doesn't.")

                    # Save downsampled 
                    if key == 'temperature':
                        print('max temperature:', np.max(downsampled))
                    output_data.create_dataset(key, data=downsampled)

                elif key in keys_to_copy:
                    # Directly copy keys
                    output_data.create_dataset(key, data=dataset)

                else:
                    print(f"Skipping key '{key}' as it's not in keys_to_downsample or keys_to_copy.")

            print(f"File '{file}' processed and saved to '{output_file}'.")

print("Downsampling completed for all files.")


['Twall-100.hdf5', 'Twall-103.hdf5', 'Twall-106.hdf5', 'Twall-110.hdf5', 'Twall-79.hdf5', 'Twall-81.hdf5', 'Twall-85.hdf5', 'Twall-90.hdf5', 'Twall-95.hdf5', 'Twall-98.hdf5']
Processing key 'dfun' in file 'Twall-100.hdf5' with shape (201, 384, 384)
Processing key 'int-runtime-params' in file 'Twall-100.hdf5' with shape (79,)
Processing key 'pressure' in file 'Twall-100.hdf5' with shape (201, 384, 384)
Processing key 'real-runtime-params' in file 'Twall-100.hdf5' with shape (109,)
Processing key 'temperature' in file 'Twall-100.hdf5' with shape (201, 384, 384)
max temperature: 96.59437655251776
Processing key 'velx' in file 'Twall-100.hdf5' with shape (201, 384, 384)
Processing key 'vely' in file 'Twall-100.hdf5' with shape (201, 384, 384)
Processing key 'x' in file 'Twall-100.hdf5' with shape (201, 384, 384)
Processing key 'y' in file 'Twall-100.hdf5' with shape (201, 384, 384)
File 'Twall-100.hdf5' processed and saved to '../../data/PoolBoiling-SubCooled-FC72-2D/DSRD_128x128\Twall-100