In [8]:
import h5py 
import numpy as np
import _mgard as mgard
import time

import lz4.frame as lz4frame



In [9]:

class Slicer:

    # default 引数
    def __init__(self,filename="/scratch/aoyagir/step1_500_test.h5") -> None:
        self.filename = filename
        self.file = h5py.File(filename, 'r')
        self.dataset = self.file['data']
        print(self.dataset.shape)

    # Access specific elements in the concatenated array
    def access_array_element(self,timestep, x, y, z):
        element = self.dataset[timestep, x, y, z]
        return element

    # Access a subset of the concatenated array
    def slice_multiple_step(self, file, timestep_start, timestep_end, x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep_start:timestep_end, x_start:x_end, y_start:y_end, z_start:z_end]
        return subset
    
    # slice siingle step
    def slice_single_step(self, timestep,  x_start, x_end, y_start, y_end, z_start, z_end):
        subset = self.dataset[timestep,  x_start:x_end, y_start:y_end, z_start:z_end]
        retsubset = np.squeeze(subset)
        return retsubset

    # slice sigle step by size
    def get_xyz_offset_by_size(self, size):
        # 100MB -> 「100/(sizeof(float))」個のデータ
        sizeFloat = 4 # byte
        return int((size/sizeFloat)**(1/3))



In [14]:
slicer = Slicer("/scratch/aoyagir/step1_256_test_0902.h5")

(257, 1024, 1024, 1024)


In [16]:
# create a file to write the results
import csv
from datetime import datetime

# Get the current date and time
current_time = datetime.now()

# Format the current date and time as a string
timestamp = current_time.strftime("%Y%m%d_%H%M%S")

# timestep to conduct the benchmark
timestep = 128

# Create the file name based on the timestamp
csv_file = f'bench_{timestamp}_timestep_{timestep}.txt'

import csv

header = ['OriginalSizeInByte','CompressedSizeInByte','CompRatio','avg_load_time', 'std_dev_load_time','load_throughput'
            ,'avg_comp_time', 'std_dev_comp_time','comp_throughput', 'avg_decomp_time',
            'std_dev_decomp_time', 'decomp_throughput']

In [17]:
# Define the number of repetitions and initialize a list for results
num_repetitions = 3
results = []
timestep = 128
OriginalSize = 1 * 1024 * 1024  # 1MB in bytes

while OriginalSize <= 8000 * 1024 * 1024:  # 4000MB in bytes. 10 iterations
    # Your code here
    load_exe_times = []  # List to store execution times for each repetition
    comp_exe_times = []
    decomp_exe_times = []
    comp_data_size = None

    # get the offset size
    offset = slicer.get_xyz_offset_by_size(OriginalSize)
    print(offset)
    
    # Measure the execution time of the loading
    start_time = time.time()
    original = slicer.slice_single_step(timestep, 0, offset, 0, offset, 0, offset)
    end_time = time.time()
    load_time = end_time - start_time
    load_exe_times.append(load_time)

    for _ in range(num_repetitions):
        # Measure the execution time of compressing
        comp_start_time = time.time()
        compressed = lz4frame.compress(original)
        # compressed = mgard.compress(original, tol, 0, mgard.ErrorBoundType.REL, dev)
        comp_end_time = time.time()
        comp_exe_times.append(comp_end_time - comp_start_time)
        CompressedSize = len(compressed)

        # Measure the execution time of decompressing
        decomp_start_time = time.time()
        decompressed = lz4frame.decompress(compressed)
        decomp_end_time = time.time()
        decomp_exe_times.append(decomp_end_time - decomp_start_time)

    # Calculate average and standard deviation of execution times
    avg_load_time = np.mean(load_exe_times)
    std_dev_load_time = np.std(load_exe_times)

    avg_comp_time = np.mean(comp_exe_times)
    std_dev_comp_time = np.std(comp_exe_times)

    avg_decomp_time = np.mean(decomp_exe_times)
    std_dev_decomp_time = np.std(decomp_exe_times)

    row_data = [OriginalSize, CompressedSize,OriginalSize/CompressedSize,
                avg_load_time, std_dev_load_time,OriginalSize/avg_load_time,
                avg_comp_time, std_dev_comp_time,OriginalSize/avg_comp_time,
                avg_decomp_time, std_dev_decomp_time, OriginalSize/avg_decomp_time, 
                ]
    # Write the data row
    with open(csv_file, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(row_data)

    # Double the size
    OriginalSize *= 2
    print(row_data)

# Convert the results list to a NumPy array for easier manipulation
results_array = np.array(results)

63
[1048576, 1000271, 1.0482919128916064, 0.006097555160522461, 0.0, 171966628.0001564, 0.001126686731974284, 0.00015864498775680768, 930672182.6417437, 0.0005818208058675131, 0.0001030049816285773, 1802231871.7814505]
80
[2097152, 2048147, 1.0239265052752562, 0.007956981658935547, 0.0, 263561245.94618565, 0.004338264465332031, 0.0007285808856823922, 483408057.93624973, 0.0010494391123453777, 5.436094646325979e-05, 1998355097.8132524]
101
[4194304, 4121475, 1.017670615495666, 0.013971805572509766, 0.0, 300197707.32084227, 0.006493409474690755, 0.0020632448710528124, 645932466.8108585, 0.005886554718017578, 0.0010675552829048094, 712522723.5486432]
127
[8388608, 8194055, 1.0237431894220872, 0.018749713897705078, 0.0, 447399253.4375016, 0.012086073557535807, 0.004714271126402844, 694072227.846868, 0.006667613983154297, 0.0025550561497504633, 1258112425.4034185]
161
[16777216, 16694163, 1.0049749723900503, 0.03353452682495117, 0.0, 500296786.28168416, 0.022109429041544598, 0.0099285912209

In [18]:
# Define the number of repetitions and initialize a list for results
import bz2

num_repetitions = 3
results = []
timestep = 128
OriginalSize = 1 * 1024 * 1024  # 1MB in bytes

while OriginalSize <= 8000 * 1024 * 1024:  # 4000MB in bytes. 10 iterations
    # Your code here
    load_exe_times = []  # List to store execution times for each repetition
    comp_exe_times = []
    decomp_exe_times = []
    comp_data_size = None

    # get the offset size
    offset = slicer.get_xyz_offset_by_size(OriginalSize)
    print(offset)
    
    # Measure the execution time of the loading
    start_time = time.time()
    original = slicer.slice_single_step(timestep, 0, offset, 0, offset, 0, offset)
    end_time = time.time()
    load_time = end_time - start_time
    load_exe_times.append(load_time)

    for _ in range(num_repetitions):
        # Measure the execution time of compressing
        comp_start_time = time.time()
        compressed = bz2.compress(original)
        # compressed = mgard.compress(original, tol, 0, mgard.ErrorBoundType.REL, dev)
        comp_end_time = time.time()
        comp_exe_times.append(comp_end_time - comp_start_time)
        CompressedSize = len(compressed)

        # Measure the execution time of decompressing
        decomp_start_time = time.time()
        decompressed = bz2.decompress(compressed)
        decomp_end_time = time.time()
        decomp_exe_times.append(decomp_end_time - decomp_start_time)

    # Calculate average and standard deviation of execution times
    avg_load_time = np.mean(load_exe_times)
    std_dev_load_time = np.std(load_exe_times)

    avg_comp_time = np.mean(comp_exe_times)
    std_dev_comp_time = np.std(comp_exe_times)

    avg_decomp_time = np.mean(decomp_exe_times)
    std_dev_decomp_time = np.std(decomp_exe_times)

    row_data = [OriginalSize, CompressedSize,OriginalSize/CompressedSize,
                avg_load_time, std_dev_load_time,OriginalSize/avg_load_time,
                avg_comp_time, std_dev_comp_time,OriginalSize/avg_comp_time,
                avg_decomp_time, std_dev_decomp_time, OriginalSize/avg_decomp_time, 
                ]
    # Write the data row
    with open(csv_file, "a", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(row_data)

    # Double the size
    OriginalSize *= 2
    print(row_data)

# Convert the results list to a NumPy array for easier manipulation
results_array = np.array(results)

63
[1048576, 924246, 1.1345204631667327, 0.32573723793029785, 0.0, 3219085.4403461763, 0.2729654312133789, 0.13290995404500866, 3841424.151545113, 0.1978297233581543, 0.13203697942280254, 5300396.635047809]
80
[2097152, 1902450, 1.1023427685353098, 0.006132364273071289, 0.0, 341980989.1609191, 0.37320995330810547, 0.0035520544155651286, 5619228.483621617, 0.21511014302571616, 0.0004049784250989548, 9749200.900067683]
101
[4194304, 3847625, 1.0901020759559468, 0.010094642639160156, 0.0, 415498017.10949457, 0.7455356121063232, 0.0012428835915500836, 5625893.561475957, 0.4456464449564616, 0.014045393967712769, 9411729.96546572]
127
[8388608, 7645332, 1.097219584447085, 0.017945528030395508, 0.0, 467448379.6627031, 1.5174321333567302, 0.0246381082772366, 5528160.248882734, 0.8841447830200195, 0.04337842256968647, 9487821.634084174]
161
[16777216, 15548795, 1.0790042572430854, 0.02635049819946289, 0.0, 636694451.5812598, 3.110009034474691, 0.03969509561735575, 5394587.544287899, 1.810675064