In [7]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import numpy as np
import pyarrow as pa
from pynsys.nsys_scanner import NsysScanner

input_dir = '../data/compare_gpu_exact/a100/'

compressors = ['snappy', 'zstd', 'gdeflate', 'lz4', 'cascaded', 'bitcomp']
compression_files = ['1gb_dickens', '1gb_mozilla', '1gb_mr', '1gb_nci', '1gb_samba', '1gb_sao', '1gb_silesia.zip', '1gb_xml', '1gb_lineitem.parquet']
approx_number_of_threads = [8, 32, 128, 512, 2048, 8192, 32768, 131072, 524288, 2097152, 8388608]
FILE_SIZE=524288000


In [8]:
# Generate dataframe with utilization data

result_df = NsysScanner(input_dir, FILE_SIZE).get_utilisation_df(compression_files, compressors, approx_number_of_threads)


print(result_df)


Failed getting utilization of snappy 1gb_dickens 8 threads: "Unable to synchronously open object (object 'TARGET_INFO_GPU_METRICS' doesn't exist)"
1gb_dickens  snappy  32  1.0  1.0
1gb_dickens  snappy  128  3.7944029247549396  3.8545732499714047
1gb_dickens  snappy  512  14.488014950759863  14.315356417182802
1gb_dickens  snappy  2048  35.1622545180542  51.721908475121545
1gb_dickens  snappy  8192  48.09527640270693  84.65191530144759
1gb_dickens  snappy  32768  51.68407097433246  95.61622077132225
Failed getting utilization of snappy 1gb_dickens 131072 threads: "Unable to synchronously open object (object 'TARGET_INFO_GPU_METRICS' doesn't exist)"
1gb_dickens  snappy  524288  52.877961535873794  96.91268910267198
Failed getting utilization of snappy 1gb_dickens 2097152 threads: [Errno 2] Unable to synchronously open file (unable to open file: name = '../data/compare_gpu_exact/a100/output_snappy_1gb_dickens_2097152threads.h5', errno = 2, error message = 'No such file or directory', flag

In [9]:
fig_utilization_standard = px.line(
    result_df,
    title=f"GPU utilization per compressor at different chunk sizes", 
    x="chunk_size",
    y="compression_utilization",
    color="file",
    log_x=True,
    markers=True,
    #log_y=True,
    facet_row="standard",
    # facet_col_spacing=0.05,
    height=1000,
    # width=1200,
)
# fig_utilization_standard.update_xaxes(autorange="reversed")


fig_utilization_standard.show()

In [10]:
fig_utilization_standard = px.line(
    result_df,
    title=f"GPU utilization per decompressor at different chunk sizes", 
    x="chunk_size",
    y="decompression_utilization",
    color="file",
    log_x=True,
    markers=True,
    #log_y=True,
    facet_row="standard",
    # facet_col_spacing=0.05,
    height=1000,
    width=1000,
)
# fig_utilization_standard.update_xaxes(autorange="reversed")


fig_utilization_standard.show()

In [11]:
fig_utilization_file = px.line(
    result_df,
    title=f"GPU utilization during compression per file at different chunk sizes", 
    x="chunk_size",
    y="compression_utilization",
    color="standard",
    log_x=True,
    markers=True,
    #log_y=True,
    facet_row="file",
    # facet_row_spacing=0.05,
    height=1200,
    width=1000,
)
# fig_utilization_standard.update_xaxes(autorange="reversed")


fig_utilization_file.show()

In [12]:
fig_utilization_file = px.line(
    result_df,
    title=f"GPU utilization during decompression per file  at different chunk sizes", 
    x="chunk_size",
    y="decompression_utilization",
    color="standard",
    log_x=True,
    markers=True,
    #log_y=True,
    facet_row="file",
    # facet_row_spacing=0.05,
    height=1200,
    width=1000,
)
# fig_utilization_standard.update_xaxes(autorange="reversed")


fig_utilization_file.show()