In [1]:
import os
import h5py
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

In [2]:
TEMPERATURE = 'temperature'
VELX = 'velx'
VELY = 'vely'
PRESSURE = 'pressure'


class SimpleLoaderBubbleML(Dataset):
    def __init__(self, filename):
        self.filename = filename
        self.data = h5py.File(self.filename, 'r')
        self.timesteps = self.data[TEMPERATURE][:].shape[0]
    
    def __len__(self):
        return self.timesteps - 1

    def _get_state(self, idx):
        r"""
        The input is the temperature, x-velocity, and y-velocity at time == idx
        """
        temp = torch.from_numpy(self.data[TEMPERATURE][idx])
        velx = torch.from_numpy(self.data[VELX][idx])
        vely = torch.from_numpy(self.data[VELY][idx])
        # returns a stack with shape [3 x Y x X]
        return torch.stack((temp, velx, vely), dim=0)
    
    def __getitem__(self, idx):
        r"""
        As input, get temperature and velocities at time == idx.
        As the output label, get the temperature at time == idx + 1.
        """
        input = self._get_state(idx)
        label = self._get_state(idx+1)
        return input, label
    
    def get_full_stack(self):
        r"""
        Retrieve the full temporal domain stack for the entire simulation.
        The returned stack will have shape: [timesteps, channels, Y, X].
        """
        # Load temperature, velx, and vely data from the HDF5 file
        temp_data = torch.from_numpy(self.data[TEMPERATURE][:])  # Shape: [timesteps, Y, X]
        velx_data = torch.from_numpy(self.data[VELX][:])         # Shape: [timesteps, Y, X]
        vely_data = torch.from_numpy(self.data[VELY][:])         # Shape: [timesteps, Y, X]
        
        # Stack the data along the channel dimension
        full_stack = torch.stack((temp_data, velx_data, vely_data), dim=1)  # Shape: [timesteps, channels, Y, X]
        return full_stack

    

In [7]:
import os
import h5py
import torch
import torch.nn.functional as F

redim = True

# Parameters
data_path = '../../data'

downsample_factor = 4  # Downsample factor for spatial dimensions
if redim:
    output_path = os.path.join(data_path, 'downsampled_redimensionalized/')
else:
    output_path = os.path.join(data_path, 'downsampled/')

data_path = data_path + '/redimensionalized' if redim else data_path

os.makedirs(output_path, exist_ok=True)  # Ensure output directory exists

keys_to_downsample = [
    'temperature',
    'velx',
    'vely',
    'dfun',
    'pressure',
    'x',
    'y'
]

keys_to_copy = [
    'real-runtime-params',
    'int-runtime-params'
]


def downsample(data, factor):
    """
    Downsample a tensor's spatial dimensions by the given factor.
    Args:
        data (torch.Tensor): 3D or 4D tensor [timesteps, channels, Y, X].
        factor (int): Downsampling factor for spatial dimensions.
    Returns:
        torch.Tensor: Downsampled tensor.
    """
    if factor == 1:  # No downsampling needed
        return data
    _, _, height, width = data.shape
    new_height, new_width = height // factor, width // factor

    # Use PyTorch's interpolate for downsampling
    downsampled = F.interpolate(data, size=(new_height, new_width), mode='area')
    return downsampled


# Process each file in the directory
files = [f for f in os.listdir(data_path) if f.endswith('.hdf5')]

for file in files:
    input_file = os.path.join(data_path, file)
    output_file = os.path.join(output_path, file)

    with h5py.File(input_file, 'r') as input_data:
        # Prepare output HDF5 file
        with h5py.File(output_file, 'w') as output_data:
            for key in input_data.keys():
                dataset = input_data[key][:]
                print(f"Processing key '{key}' in file '{file}' with shape {dataset.shape}")

                if key in keys_to_downsample:
                    # Downsample spatial-temporal data
                    if len(dataset.shape) >= 3:  # Process only 3D or 4D data
                        if len(dataset.shape) == 3:
                            dataset = torch.from_numpy(dataset).unsqueeze(1)  # Add channel dimension
                        else:
                            dataset = torch.from_numpy(dataset)

                        downsampled = downsample(dataset, downsample_factor)
                        downsampled = downsampled.numpy()
                        # Remove channel dimension for datasets that were originally 3D
                        if downsampled.shape[1] == 1:
                            downsampled = downsampled[:, 0]
                    else:
                        raise ValueError(f"Key '{key}' expected to have spatial dimensions but doesn't.")

                    # Save downsampled 
                    if key == 'temperature':
                        print('max termperature:', np.max(downsampled))
                    output_data.create_dataset(key, data=downsampled)

                elif key in keys_to_copy:
                    # Directly copy keys
                    output_data.create_dataset(key, data=dataset)

                else:
                    print(f"Skipping key '{key}' as it's not in keys_to_downsample or keys_to_copy.")

            print(f"File '{file}' processed and saved to '{output_file}'.")

print("Downsampling completed for all files.")


FileNotFoundError: [WinError 3] The system cannot find the path specified: '../../data/redimensionalized'