# Test Loading data and basic pre processing

In [1]:
import torch
import numpy as np
import h5py

## Load Data from h5 file

In [2]:
file_path='/home/squirt/Documents/data/rp_weather_data/all_data.h5'

In [3]:
def load_hdf5(filename):
    data = []  # List to hold dictionaries
    with h5py.File(filename, 'r') as f:
        # Iterate through groups (each representing an entry in the original list)
        for group_name in f:
            group = f[group_name]
            # Reconstruct dictionary from datasets and attributes
            entry = {
                # Attributes (metadata)
                'day': group.attrs['day'],
                'region': group.attrs['region'],
                'time': group.attrs['time'],

                # groups (numpy arrays)
                'landmass': group['landmass'][...],  # Use [...] to read the full dataset
                'x': group['x'][...],
                'y': group['y'][...],
            }
            data.append(entry)
    return data

In [4]:
data_list = load_hdf5(file_path)

In [5]:
data_list[0]['x'].shape

(3, 70, 2, 2)

In [6]:
def stack_data(data:list[dict], key:str) -> torch.Tensor:
    return torch.stack([torch.tensor(entry[key]) for entry in data])

In [7]:
X = stack_data(data_list, 'x')

In [8]:
print(X.shape)

torch.Size([19200, 3, 70, 2, 2])


In [9]:
LM = stack_data(data_list, 'landmass')
Y = stack_data(data_list, 'y')

In [11]:
print(LM.shape)
print(Y.shape)

torch.Size([19200, 3, 2, 2])
torch.Size([19200, 2, 70, 2, 2])


In [17]:
print(Y[0,0,0,:])
print(data_list[0]['y'][0,0,:])

tensor([[3.1161, 2.6633],
        [1.9113, 2.4286]], dtype=torch.float64)
[[3.11609554 2.66331124]
 [1.91130269 2.42855477]]
