In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import numpy as np
import pyarrow as pa
import nsys_constants
import nsys_reader

input_dir = '../../../data/decompression/profiling_result_all/'

compressors = ['snappy', 'zstd']
compression_files = ['8gb_dickens', '8gb_mozilla', '8gb_mr', '8gb_nci', '8gb_samba', '8gb_sao', '8gb_silesia.zip', '8gb_xml', '8gb_lineitem.parquet']
approx_number_of_threads = [8, 32, 128, 512, 2048, 8192, 32768, 131072, 524288, 2097152, 8388608]

FILE_SIZE = 8589934592

In [2]:
# Generate dataframe with utilization data
result_df = pd.DataFrame(columns=['standard', 'chunk_size', 'file', 'compression_utilization', 'decompression_utilization'])


for file_num, compression_file in enumerate(compression_files):
    for compressor_num, compressor in enumerate(compressors):
        for threads_num in approx_number_of_threads:
            input_file = input_dir + 'output_' + str(compressor) + '_' + str(compression_file) + '_' + str(threads_num) + 'threads.h5'
            chunk_size = FILE_SIZE/threads_num
            print(input_file)
            try:
                reader = nsys_reader.NsysReader(input_file)
                compression_utilisation, decompression_utilisation = reader.get_compressions_utilizations()
            except Exception as e:
                print('Failed getting utilization of ' + str(compressor) + ' ' + str(compression_file) + ' ' + str(threads_num) + ' threads: ' +str(e))
                continue
            result_df.loc[len(result_df)] = {'standard': str(compressor), 
                                          'chunk_size': int(chunk_size),
                                          'file': str(compression_file),
                                          'compression_utilization': float(decompression_utilisation), 
                                          'decompression_utilization': float(compression_utilisation)}
        
        name_compression = str(compressor) + ' compression utilization'
        name_decompression = str(compressor) + ' decompression utilization'


print(result_df)


../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_8threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_32threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_128threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_512threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_2048threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_8192threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_32768threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_131072threads.h5
../../../data/decompression/profiling_result_all/output_snappy_8gb_dickens_524288threads.h5
Failed getting utilization of snappy 8gb_dickens 524288 threads: [Errno 2] Unable to synchronously open file (unable to open file: name = '../../../data/decompression/profiling_result_all/outp

In [3]:
fig_utilization_standard = px.line(
    result_df,
    title=f"GPU utilization at different chunk sizes", 
    x="chunk_size",
    y="decompression_utilization",
    color="file",
    log_x=True,
    markers=True,
    #log_y=True,
    facet_col="standard",
    facet_col_spacing=0.05,
    width=1200,
)
# fig_utilization_standard.update_xaxes(autorange="reversed")


fig_utilization_standard.show()

In [4]:
# Utilization per compressor

In [9]:
fig_utilization_file = px.line(
    result_df,
    title=f"GPU utilization at different chunk sizes", 
    x="chunk_size",
    y="decompression_utilization",
    color="standard",
    log_x=True,
    markers=True,
    #log_y=True,
    facet_row="file",
    facet_row_spacing=0.05,
    height=1200,
    width=500,
)
# fig_utilization_standard.update_xaxes(autorange="reversed")


fig_utilization_file.show()