In [3]:
import scipy.io
import numpy as np
import os

In [4]:
# --- 1. Configuration ---
# IMPORTANT: Replace these placeholders with the actual paths to your .mat files
mat_file_paths = [
    "/home/zhuyekun/projects/repos/deeprte/data/raw/train/merge/shuffled_500_samples.mat",  # Replace with your first file
    "/home/zhuyekun/projects/repos/deeprte/data/raw/train/S-g0.01/S.mat",  # Replace with your second file
    "/home/zhuyekun/projects/repos/deeprte/data/raw/train/S-g0.95/S-g0.95.mat",   # Replace with your third file
]

# IMPORTANT: Replace this with the desired output path and filename
output_path = "/home/zhuyekun/projects/repos/deeprte/data/raw/train/S_merge/merged_train.mat"

In [5]:
merged_data = {k: v for k, v in scipy.io.loadmat(mat_file_paths[0]).items() if not k.startswith('__')}

In [6]:
merged_data.keys()

dict_keys(['config', 'ct', 'omega_prime', 'phi', 'psi_bc', 'psi_label', 'rand_params', 'rv_prime', 'scattering_kernel', 'sigma_a', 'sigma_t', 'st', 'w_angle', 'x', 'y'])

In [7]:
import tree
tree.map_structure(lambda x: x.shape if isinstance(x, np.ndarray) else x, merged_data)

{'config': (1, 1),
 'ct': (24, 1),
 'omega_prime': (164, 12),
 'phi': (500, 41, 41),
 'psi_bc': (500, 164, 12),
 'psi_label': (500, 41, 41, 24),
 'rand_params': (1, 1000),
 'rv_prime': (164, 12, 4),
 'scattering_kernel': (500, 24, 24),
 'sigma_a': (500, 41, 41),
 'sigma_t': (500, 41, 41),
 'st': (24, 1),
 'w_angle': (24, 1),
 'x': (1, 41),
 'y': (1, 41)}

In [8]:
# Check if there are files to merge
if not mat_file_paths or not all(isinstance(p, str) for p in mat_file_paths):
    print("Error: Please specify the paths to your .mat files in the 'mat_file_paths' list.")
else:
    # Load the first file to initialize the merged_data dictionary
    base_file = mat_file_paths[0]
    print(f"Loading base file: {base_file}")

    try:
        # Initialize merged_data with all contents of the first file
        merged_data = {k: v for k, v in scipy.io.loadmat(base_file).items() if not k.startswith('__')}

        # Define the specific keys that need to be concatenated
        keys_to_concatenate = ['scattering_kernel', 'sigma_a', 'sigma_t', 'phi', 'psi_bc', 'psi_label']
        print(f"Will concatenate the following keys: {keys_to_concatenate}")

        # Iterate over the rest of the files and merge
        for file_path in mat_file_paths[1:]:
            print(f"Merging file: {file_path}")
            try:
                data_to_merge = scipy.io.loadmat(file_path)
                # Only iterate over the keys specified for concatenation
                for key in keys_to_concatenate:
                    if key in merged_data and key in data_to_merge:
                        # Concatenate along the first dimension (axis=0)
                        merged_data[key] = np.concatenate(
                            (merged_data[key], data_to_merge[key]), axis=0
                        )
                    else:
                        print(f"Warning: Key '{key}' not found in both base file and {file_path}. Skipping concatenation for this key.")
            except FileNotFoundError:
                print(f"Error: File not found at {file_path}")
            except Exception as e:
                print(f"An error occurred while processing {file_path}: {e}")

        # --- 3. Save the merged file ---
        print(f"Saving merged data to: {output_path}")
        # Ensure the output directory exists
        output_dir = os.path.dirname(output_path)
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)

        scipy.io.savemat(output_path, merged_data)
        print("Merging complete!")

        # --- 4. Verification (Optional) ---
        print("\nVerifying merged file...")
        verification_data = scipy.io.loadmat(output_path)
        for key, value in verification_data.items():
            if not key.startswith('__'):
                print(f"Key: '{key}', Shape: {value.shape}")

    except FileNotFoundError:
        print(f"Error: Base file not found at {base_file}")
    except Exception as e:
        print(f"An error occurred: {e}")


Loading base file: /home/zhuyekun/projects/repos/deeprte/data/raw/train/merge/shuffled_500_samples.mat
Will concatenate the following keys: ['scattering_kernel', 'sigma_a', 'sigma_t', 'phi', 'psi_bc', 'psi_label']
Merging file: /home/zhuyekun/projects/repos/deeprte/data/raw/train/S-g0.01/S.mat
Merging file: /home/zhuyekun/projects/repos/deeprte/data/raw/train/S-g0.95/S-g0.95.mat
Saving merged data to: /home/zhuyekun/projects/repos/deeprte/data/raw/train/S_merge/merged_train.mat
Merging complete!

Verifying merged file...
Key: 'config', Shape: (1, 1)
Key: 'ct', Shape: (24, 1)
Key: 'omega_prime', Shape: (164, 12)
Key: 'phi', Shape: (1000, 41, 41)
Key: 'psi_bc', Shape: (1000, 164, 12)
Key: 'psi_label', Shape: (1000, 41, 41, 24)
Key: 'rand_params', Shape: (1, 1000)
Key: 'rv_prime', Shape: (164, 12, 4)
Key: 'scattering_kernel', Shape: (1000, 24, 24)
Key: 'sigma_a', Shape: (1000, 41, 41)
Key: 'sigma_t', Shape: (1000, 41, 41)
Key: 'st', Shape: (24, 1)
Key: 'w_angle', Shape: (24, 1)
Key: 'x', 