In [6]:
import h5py 
import numpy as np
import _mgard as mgard
import time

from tqdm import tqdm

In [7]:

class Slicer:

    # default 引数
    def __init__(self,filename="/scratch/aoyagir/step1_500_test.h5") -> None:
        self.filename = filename
        self.file = h5py.File(filename, 'r')
        self.dataset = self.file['data']
        print(self.dataset.shape)

    # Access specific elements in the concatenated array
    def access_array_element(self,timestep, x, y, z):
        element = self.dataset[timestep, x, y, z]
        return element

    # Access a subset of the concatenated array
    def slice_multiple_step(self, file, timestep_start, timestep_end, x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep_start:timestep_end, x_start:x_end, y_start:y_end, z_start:z_end]
        return subset
    
    # slice siingle step
    def slice_single_step(self, timestep,  x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep,  x_start:x_end, y_start:y_end, z_start:z_end]
        retsubset = np.squeeze(subset)
        return retsubset

    # slice sigle step by size
    def get_xyz_offset_by_size(self, size):
        # 100MB -> 「100/(sizeof(float))」個のデータ
        sizeFloat = 4 # byte
        return int((size/sizeFloat)**(1/3))



In [8]:
slicer = Slicer("/scratch/aoyagir/step1_256_test_0902.h5")

(257, 1024, 1024, 1024)


In [9]:
# config for hardware

configGPU = mgard.Config()
configGPU.dev_type = mgard.DeviceType.CUDA

configSingle = mgard.Config()
configSingle.dev_type = mgard.DeviceType.SERIAL

configOMP = mgard.Config()
configOMP.dev_type = mgard.DeviceType.OPENMP

deviceConfig = [(configSingle,"single"),(configOMP,"omp"),(configGPU,"gpu")]

In [10]:
# create a file to write the results
import csv
from datetime import datetime

# Get the current date and time
current_time = datetime.now()

# Format the current date and time as a string
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# Create the file name based on the timestamp
csv_file = f'bench_{timestamp}.txt'

import csv

header = ['tol', 'OriginalSizeInByte','CompressedSizeInByte','CompRatio','avg_load_time', 'std_dev_load_time','load_throughput'
            ,'avg_comp_time', 'std_dev_comp_time','comp_throughput', 'avg_decomp_time',
            'std_dev_decomp_time', 'decomp_throughput', 'devName']

with open(csv_file, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(header)



In [11]:
# Define the number of repetitions and initialize a list for results
num_repetitions = 3
results = []

# Wrap your outer loop with tqdm to create a progress bar
for devtuple in tqdm(deviceConfig): # 3 iteration
    dev = devtuple[0]
    devName = devtuple[1]
    for tol in np.arange(0, 1, 0.01): # 100 iteration
        if tol == 0:
            print("passed")
            continue
        print(tol)
        OriginalSize = 1 * 1024 * 1024  # 1MB in bytes

        while OriginalSize <= 4000 * 1024 * 1024:  # 4000MB in bytes. 10 iterations
            # Your code here
            load_exe_times = []  # List to store execution times for each repetition
            comp_exe_times = []
            decomp_exe_times = []
            comp_data_size = None

            # get the offset size
            offset = slicer.get_xyz_offset_by_size(OriginalSize)
            print(offset)
            
            # Measure the execution time of the loading
            start_time = time.time()
            original = slicer.slice_single_step(0, 0, offset, 0, offset, 0, offset)
            end_time = time.time()
            load_time = end_time - start_time
            load_exe_times.append(load_time)

            for _ in range(num_repetitions):
                # Measure the execution time of compressing
                comp_start_time = time.time()
                compressed = mgard.compress(original, tol, 0, mgard.ErrorBoundType.REL, dev)
                comp_end_time = time.time()
                comp_exe_times.append(comp_end_time - comp_start_time)
                CompressedSize = compressed.nbytes

                # Measure the execution time of decompressing
                decomp_start_time = time.time()
                decompressed = mgard.decompress(compressed, dev)
                decomp_end_time = time.time()
                decomp_exe_times.append(decomp_end_time - decomp_start_time)

            # Calculate average and standard deviation of execution times
            avg_load_time = np.mean(load_exe_times)
            std_dev_load_time = np.std(load_exe_times)

            avg_comp_time = np.mean(comp_exe_times)
            std_dev_comp_time = np.std(comp_exe_times)

            avg_decomp_time = np.mean(decomp_exe_times)
            std_dev_decomp_time = np.std(decomp_exe_times)

            row_data = [tol, OriginalSize, CompressedSize,OriginalSize/CompressedSize,
                        avg_load_time, std_dev_load_time,OriginalSize/avg_load_time,
                        avg_comp_time, std_dev_comp_time,OriginalSize/avg_comp_time,
                        avg_decomp_time, std_dev_decomp_time, OriginalSize/avg_decomp_time, 
                        devName]
            # Write the data row
            with open(csv_file, "a", newline="") as f:
                writer = csv.writer(f)
                writer.writerow(row_data)

            # Double the size
            OriginalSize *= 2
            print(row_data)

# Convert the results list to a NumPy array for easier manipulation
results_array = np.array(results)

  0%|          | 0/3 [00:00<?, ?it/s]

passed
0.01
63


  0%|          | 0/3 [00:01<?, ?it/s]


NameError: name 'CompressSize' is not defined