In [1]:
import h5py 
import numpy as np
import _mgard as mgard
import time

from tqdm import tqdm

In [15]:
import numpy as np

def psnr(original_data, decompressed_data):
    """
    Calculate Peak Signal-to-Noise Ratio (PSNR) between two 3D arrays with dynamic maximum value determination.

    Args:
        original_data (numpy.ndarray): The original data as a 3D numpy array (float).
        decompressed_data (numpy.ndarray): The decompressed data as a 3D numpy array (float).

    Returns:
        float: The PSNR value.
    """
    # Ensure the input arrays have the same shape
    if original_data.shape != decompressed_data.shape:
        raise ValueError("Input arrays must have the same shape")

    # Calculate the mean squared error (MSE) between the two arrays
    mse = np.mean((original_data - decompressed_data) ** 2)

    # Calculate the dynamic maximum possible value based on the maximum value in the input arrays
    max_possible_value = np.max([np.max(original_data), np.max(decompressed_data)])

    # Calculate the PSNR using the formula: PSNR = 20 * log10(max_possible_value / sqrt(MSE))
    psnr_value = 20 * np.log10(max_possible_value / np.sqrt(mse))

    return psnr_value


In [10]:

class Slicer:

    # default 引数
    def __init__(self,filename="/scratch/aoyagir/step1_500_test.h5") -> None:
        self.filename = filename
        self.file = h5py.File(filename, 'r')
        self.dataset = self.file['data']
        print(self.dataset.shape)

    # Access specific elements in the concatenated array
    def access_array_element(self,timestep, x, y, z):
        element = self.dataset[timestep, x, y, z]
        return element

    # Access a subset of the concatenated array
    def slice_multiple_step(self, file, timestep_start, timestep_end, x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep_start:timestep_end, x_start:x_end, y_start:y_end, z_start:z_end]
        return subset
    
    # slice siingle step
    def slice_single_step(self, timestep,  x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep,  x_start:x_end, y_start:y_end, z_start:z_end]
        retsubset = np.squeeze(subset)
        return retsubset

    # slice sigle step by size
    def get_xyz_offset_by_size(self, size):
        # 100MB -> 「100/(sizeof(float))」個のデータ
        sizeFloat = 4 # byte
        return int((size/sizeFloat)**(1/3))



In [11]:
slicer = Slicer("/scratch/aoyagir/step1_256_test_0902.h5")

(257, 1024, 1024, 1024)


In [12]:
# create a file to write the results
import csv
from datetime import datetime

# Get the current date and time
current_time = datetime.now()

# Format the current date and time as a string
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# timestep to conduct the benchmark
timestep = 128

# Create the file name based on the timestamp
csv_file = f'bench_{timestamp}_timestep_{timestep}_psrn_by_tol.txt'

import csv

header = ['tol', 'oriSize','psnr']

with open(csv_file, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(header)



In [16]:
# Define the number of repetitions and initialize a list for results

results = []
tols = [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.02, 0.03, 0.05, 0.1, 0.5, 1]

configGPU = mgard.Config()
configGPU.dev_type = mgard.DeviceType.CUDA

# Wrap your outer loop with tqdm to create a progress bar

for tol in tols: # 100 iteration
    if tol == 0:
        print("passed")
        continue
    print(tol)
    OriginalSize = 1 * 1024 * 1024  # 1MB in bytes

    while OriginalSize <= 8000 * 1024 * 1024:  # 4000MB in bytes. 10 iterations
        # Your code here
        load_exe_times = []  # List to store execution times for each repetition
        comp_exe_times = []
        decomp_exe_times = []
        comp_data_size = None

        # get the offset size
        offset = slicer.get_xyz_offset_by_size(OriginalSize)
        
        original = slicer.slice_single_step(timestep, 0, offset, 0, offset, 0, offset)
        compressed = mgard.compress(original, tol, 0, mgard.ErrorBoundType.REL, configGPU)
        compressedSize = compressed.nbytes
        decompressed = mgard.decompress(compressed, configGPU)

        p = psnr(original,decompressed)


        row_data = [tol, OriginalSize,compressedSize, p]

        # Write the data row
        with open(csv_file, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(row_data)

        # Double the size
        OriginalSize *= 2
        print(row_data)

# Convert the results list to a NumPy array for easier manipulation
results_array = np.array(results)

1e-05
63
[1e-05, 1048576, 115.46810642547464]
80
[1e-05, 2097152, 113.11041589705331]
101
[1e-05, 4194304, 109.75245921398282]
127
[1e-05, 8388608, 106.88988568687407]
161
[1e-05, 16777216, 108.22153989527126]
203
[1e-05, 33554432, 110.48372697594488]
255
[1e-05, 67108864, 115.98918799870472]
322
[1e-05, 134217728, 115.62137553307383]
406
[1e-05, 268435456, 115.26235857635648]
511
[1e-05, 536870912, 116.23810404199055]
645
[1e-05, 1073741824, 117.39339870783965]
812
[1e-05, 2147483648, 118.17591492218904]
1023
[1e-05, 4294967296, 124.4773847052398]
5e-05
63
[5e-05, 1048576, 101.48069218217043]
80
[5e-05, 2097152, 99.14260137572671]
101
[5e-05, 4194304, 95.77030324881507]
127
[5e-05, 8388608, 92.91079474727738]
161
[5e-05, 16777216, 94.24268135463316]
203
[5e-05, 33554432, 96.50234789752118]
255
[5e-05, 67108864, 102.01277251546053]
322
[5e-05, 134217728, 101.63978107311465]
406
[5e-05, 268435456, 101.28386826913923]
511
[5e-05, 536870912, 102.25826628324681]
645
[5e-05, 1073741824, 103

In [None]:
# tol vs original size
# Define the number of repetitions and initialize a list for results

results = []
tols = [0.00001, 0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.02, 0.03, 0.05, 0.1, 0.5, 1]

configGPU = mgard.Config()
configGPU.dev_type = mgard.DeviceType.CUDA

# Wrap your outer loop with tqdm to create a progress bar

for tol in tols: # 100 iteration
    if tol == 0:
        print("passed")
        continue
    print(tol)

    OriginalSize = 4 * 1024 * 1024 * 1024  # 4GiB in bytes

    # get the offset size
    offset = slicer.get_xyz_offset_by_size(OriginalSize)
    original = slicer.slice_single_step(timestep, 0, offset, 0, offset, 0, offset)
    compressed = mgard.compress(original, tol, 0, mgard.ErrorBoundType.REL, configGPU)
    compressedSize = compressed.nbytes
    decompressed = mgard.decompress(compressed, configGPU)
    p = psnr(original,decompressed)
    
    row_data = [tol, OriginalSize,compressedSize, p]

    # Write the data row
    with open(csv_file, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(row_data)

    # Double the size
    OriginalSize *= 2
    print(row_data)

# Convert the results list to a NumPy array for easier manipulation
results_array = np.array(results)