In [1]:
import h5py 
import numpy as np
import _mgard as mgard
import time

from tqdm import tqdm

In [2]:
import numpy as np

def psnr(original_data, decompressed_data):
    """
    Calculate Peak Signal-to-Noise Ratio (PSNR) between two 3D arrays with dynamic maximum value determination.

    Args:
        original_data (numpy.ndarray): The original data as a 3D numpy array (float).
        decompressed_data (numpy.ndarray): The decompressed data as a 3D numpy array (float).

    Returns:
        float: The PSNR value.
    """
    # Ensure the input arrays have the same shape
    if original_data.shape != decompressed_data.shape:
        raise ValueError("Input arrays must have the same shape")

    # Calculate the mean squared error (MSE) between the two arrays
    mse = np.mean((original_data - decompressed_data) ** 2)

    # Calculate the dynamic maximum possible value based on the maximum value in the input arrays
    max_possible_value = np.max([np.max(original_data), np.max(decompressed_data)])

    # Calculate the PSNR using the formula: PSNR = 20 * log10(max_possible_value / sqrt(MSE))
    psnr_value = 20 * np.log10(max_possible_value / np.sqrt(mse))

    return psnr_value


In [3]:

class Slicer:

    # default 引数
    def __init__(self,filename="/scratch/aoyagir/step1_500_test.h5") -> None:
        self.filename = filename
        self.file = h5py.File(filename, 'r')
        self.dataset = self.file['data']
        print(self.dataset.shape)

    # Access specific elements in the concatenated array
    def access_array_element(self,timestep, x, y, z):
        element = self.dataset[timestep, x, y, z]
        return element

    # Access a subset of the concatenated array
    def slice_multiple_step(self, file, timestep_start, timestep_end, x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep_start:timestep_end, x_start:x_end, y_start:y_end, z_start:z_end]
        return subset
    
    # slice siingle step
    def slice_single_step(self, timestep,  x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep,  x_start:x_end, y_start:y_end, z_start:z_end]
        retsubset = np.squeeze(subset)
        return retsubset

    # slice sigle step by size
    def get_xyz_offset_by_size(self, size):
        # 100MB -> 「100/(sizeof(float))」個のデータ
        sizeFloat = 4 # byte
        return int((size/sizeFloat)**(1/3))



In [4]:
slicer = Slicer("/scratch/aoyagir/step1_256_test_0902.h5")

(257, 1024, 1024, 1024)


In [5]:
# create a file to write the results
import csv
from datetime import datetime

# Get the current date and time
current_time = datetime.now()

# Format the current date and time as a string
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# timestep to conduct the benchmark
timestep = 128

# Create the file name based on the timestamp
csv_file = f'bench_{timestamp}_timestep_{timestep}_psrn_by_tol.txt'

import csv

header = ['tol', 'oriSize','compSize','psnr']

with open(csv_file, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(header)



In [6]:
# tol vs original size
# Define the number of repetitions and initialize a list for results

results = []
tols = [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.02, 0.03, 0.05, 0.1, 0.5, 1]

configGPU = mgard.Config()
configGPU.dev_type = mgard.DeviceType.CUDA

# Wrap your outer loop with tqdm to create a progress bar

for tol in tols: # 100 iteration
    if tol == 0:
        print("passed")
        continue
    print(tol)

    OriginalSize = 4 * 1024 * 1024 * 1024  # 4GiB in bytes

    # get the offset size
    offset = slicer.get_xyz_offset_by_size(OriginalSize)
    original = slicer.slice_single_step(timestep, 0, offset, 0, offset, 0, offset)
    compressed = mgard.compress(original, tol, 0, mgard.ErrorBoundType.REL, configGPU)
    compressedSize = compressed.nbytes
    decompressed = mgard.decompress(compressed, configGPU)
    p = psnr(original,decompressed)
    
    row_data = [tol, OriginalSize,compressedSize, p]

    # Write the data row
    with open(csv_file, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(row_data)

    # Double the size
    OriginalSize *= 2
    print(row_data)

# Convert the results list to a NumPy array for easier manipulation
results_array = np.array(results)

1e-05
[1e-05, 4294967296, 2766447413, 124.4773847052398]
5e-05
[5e-05, 4294967296, 1603978301, 110.49793216245654]
0.0001
[0.0001, 4294967296, 1342334629, 104.47754364940104]
0.0005
[0.0005, 4294967296, 914715461, 90.49814793327542]
0.001
[0.001, 4294967296, 765424741, 84.48348384904678]
0.005
[0.005, 4294967296, 452057581, 70.94916005969932]
0.01
[0.01, 4294967296, 332661157, 65.79997359253977]
0.02
[0.02, 4294967296, 250654469, 61.36111691813233]
0.03
[0.03, 4294967296, 217611885, 59.12882761758198]
0.05
[0.05, 4294967296, 188387525, 56.59077153773158]
0.1
[0.1, 4294967296, 164155469, 53.59503341229156]
0.5
[0.5, 4294967296, 141688309, 46.9418292993016]
1
[1, 4294967296, 138357557, 44.18206494653654]
