In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.graph_objs.layout import YAxis,XAxis
import pathlib
import numpy as np
import pyarrow as pa
import h5py
import sqlite3
import glob
import os
import re


CUPTI_ACTIVITY_KIND_MEMCPY="CUPTI_ACTIVITY_KIND_MEMCPY"
CUPTI_ACTIVITY_KIND_MEMSET="CUPTI_ACTIVITY_KIND_MEMSET"
CUPTI_ACTIVITY_KIND_KERNEL="CUPTI_ACTIVITY_KIND_KERNEL"
# CUPTI_ACTIVITY_KIND_KERNEL (
#      start                       INTEGER   NOT NULL,                    -- Event start timestamp (ns).
#      end                         INTEGER   NOT NULL,                    -- Event end timestamp (ns).
#      deviceId                    INTEGER   NOT NULL,                    -- Device ID.
#      contextId                   INTEGER   NOT NULL,                    -- Context ID.
#      streamId                    INTEGER   NOT NULL,                    -- Stream ID.
#      correlationId               INTEGER,                               -- REFERENCES CUPTI_ACTIVITY_KIND_RUNTIME(correlationId)
#      globalPid                   INTEGER,                               -- Serialized GlobalId.
#      demangledName               INTEGER   NOT NULL,                    -- REFERENCES StringIds(id) -- Kernel function name w/ templates
#      shortName                   INTEGER   NOT NULL,                    -- REFERENCES StringIds(id) -- Base kernel function name
#      mangledName                 INTEGER,                               -- REFERENCES StringIds(id) -- Raw C++ mangled kernel function name
#      launchType                  INTEGER,                               -- REFERENCES ENUM_CUDA_KERNEL_LAUNCH_TYPE(id)
#      cacheConfig                 INTEGER,                               -- REFERENCES ENUM_CUDA_FUNC_CACHE_CONFIG(id)
#      registersPerThread          INTEGER   NOT NULL,                    -- Number of registers required for each thread executing the kernel.
#      gridX                       INTEGER   NOT NULL,                    -- X-dimension grid size.
#      gridY                       INTEGER   NOT NULL,                    -- Y-dimension grid size.
#      gridZ                       INTEGER   NOT NULL,                    -- Z-dimension grid size.
#      blockX                      INTEGER   NOT NULL,                    -- X-dimension block size.
#      blockY                      INTEGER   NOT NULL,                    -- Y-dimension block size.
#      blockZ                      INTEGER   NOT NULL,                    -- Z-dimension block size.
#      staticSharedMemory          INTEGER   NOT NULL,                    -- Static shared memory allocated for the kernel (B).
#      dynamicSharedMemory         INTEGER   NOT NULL,                    -- Dynamic shared memory reserved for the kernel (B).
#      localMemoryPerThread        INTEGER   NOT NULL,                    -- Amount of local memory reserved for each thread (B).
#      localMemoryTotal            INTEGER   NOT NULL,                    -- Total amount of local memory reserved for the kernel (B).
#      gridId                      INTEGER   NOT NULL,                    -- Unique grid ID of the kernel assigned at runtime.
#      sharedMemoryExecuted        INTEGER,                               -- Shared memory size set by the driver.
#      graphNodeId                 INTEGER,                               -- REFERENCES CUDA_GRAPH_NODE_EVENTS(graphNodeId)
#      sharedMemoryLimitConfig     INTEGER                                -- REFERENCES ENUM_CUDA_SHARED_MEM_LIMIT_CONFIG(id)
#  );
CUPTI_ACTIVITY_KIND_SYNCHRONIZATION="CUPTI_ACTIVITY_KIND_SYNCHRONIZATION"
CUPTI_ACTIVITY_KIND_CUDA_EVENT="CUPTI_ACTIVITY_KIND_CUDA_EVENT"
CUPTI_ACTIVITY_KIND_GRAPH_TRACE="CUPTI_ACTIVITY_KIND_GRAPH_TRACE"
CUPTI_ACTIVITY_KIND_RUNTIME="CUPTI_ACTIVITY_KIND_RUNTIME"
TARGET_INFO_GPU_METRICS="TARGET_INFO_GPU_METRICS"
GPU_METRICS="GPU_METRICS"
# GPU_METRICS (
#      -- GPU Metrics, events and values.
#      timestamp                   INTEGER,                               -- Event timestamp (ns).
#      typeId                      INTEGER   NOT NULL,                    -- REFERENCES TARGET_INFO_GPU_METRICS(typeId) and GENERIC_EVENT_TYPES(typeId)
#      metricId                    INTEGER   NOT NULL,                    -- REFERENCES TARGET_INFO_GPU_METRICS(metricId)
#      value                       INTEGER   NOT NULL                     -- Counter data value
#  );

STRING_IDS="StringIds"
# StringIds (
#      -- Consolidation of repetitive string values.
#      id                          INTEGER   NOT NULL   PRIMARY KEY,      -- ID reference value.
#      value                       TEXT      NOT NULL                     -- String value.
#  );


METRIC_ID_PCIE_TX=0
METRIC_ID_PCIE_RX=1
METRIC_GPC_CLOCK_FREQUENCY=9
METRIC_SYS_CLOCK_FREQUENCY=10
METRIC_GR_ACTIVE=11
METRIC_SM_ACTIVE=12
METRIC_SM_ISSUE=13
METRIC_COMPUTE_WARPS=17
METRIC_UNALLOCATED_WARPS=18
METRIC_DRAM_READ=19
METRIC_DRAM_WRITE=20




In [2]:
# Make df from sqlite
input_file_sqlite = "../data/gohan/lzbench/profile/nvcomp_lz4_nsys_results_2024-03-19-121204.sqlite"

conn = sqlite3.connect(input_file_sqlite)

# gpu_metric_df = pd.read_sql("SELECT * FROM " + GPU_METRICS, conn)


In [3]:
input_file_arrow = "../data/gohan/lzbench/profile/nvcomp_cascaded_nsys_results_2024-03-19-121204.arrows" 


In [4]:
# Make df from hdf/h5
input_file_hdf = "../../../data/decompression/nvcomp_results_2024-03-21-123552/output_zstd_8gb_dickens_131072threads.h5"

gpu_metric_df = pd.read_hdf(input_file_hdf, GPU_METRICS)
kernel_event_df = pd.read_hdf(input_file_hdf, CUPTI_ACTIVITY_KIND_KERNEL)
strings_df = pd.DataFrame(np.array(h5py.File(input_file_hdf)[STRING_IDS]))
                                             
print(strings_df)



        id                                              value
0        0                                       b'[Unknown]'
1        1                               b'[kernel.kallsyms]'
2        2                                     b'[Max depth]'
3        3                             b'[Broken backtraces]'
4        4                              b'[Called from Java]'
...    ...                                                ...
6463  6463                          b'Profiling has stopped.'
6464  6464  b'Number of GPU Metrics events collected: \t52...
6465  6465  b'No NVTX events collected. Does the process u...
6466  6466     b'Number of CUDA events collected: \t2095817.'
6467  6467  b'Number of OS runtime libraries events collec...

[6468 rows x 2 columns]


In [5]:
def get_string(id) -> str:
    return str(strings_df.loc[strings_df['id'] == id, 'value'].values[0], encoding='utf-8')

def get_utilizations_of_span(begin, end):
    compute_metrics = gpu_metric_df.loc[gpu_metric_df['metricId'] == METRIC_COMPUTE_WARPS]
    return compute_metrics[compute_metrics['timestamp'].between(begin, end)]

def get_avg_utilization_of_span(begin, end) -> float:
    utilizations = get_utilizations_of_span(begin, end)
    if len(utilizations) < 1:
        # print('Waning: No utilization data found in provided range')
        return -1
    if len(utilizations) == 1:
        return utilizations['value'].iloc[0]
    
    utilizations['time_diff'] = utilizations['timestamp'].diff()
    utilizations = utilizations.dropna(subset=['time_diff', 'value'])
    time_diff_sum = utilizations['time_diff'].sum()
    if time_diff_sum == 0:
        # print("No time difference between metrics")
        return -1
    utilization = (utilizations['time_diff'] * utilizations['value']).sum() / time_diff_sum
    
    return utilization

# print(str(get_utilizations_of_span(2097867769.0, 2097917433.0)))
# print(str(get_avg_utilization_of_span(2098191128.0, 2105510856.0)))

In [18]:
compute_metrics = gpu_metric_df.loc[gpu_metric_df['metricId'] == METRIC_COMPUTE_WARPS]


fig = px.scatter(compute_metrics.sample(frac=0.1), x="timestamp", y="value")

fig.update_traces(marker=dict(size=2))
fig.show()


In [31]:
# Get timestamps of start and end of compression
kernel_events = kernel_event_df.head(100000)


kernel_events['utilization'] = kernel_events.apply(lambda row: get_avg_utilization_of_span(row['start'], row['end']), axis=1)
# print(kernel_events[['name', 'utilization', 'gridX', 'gridY', 'gridZ']])
kernel_events['name'] = kernel_events['shortName'].apply(get_string)
# kernel_events = kernel_events[kernel_events['name'] == 'lz4CompressBatchKernel']



# print(lz4_kernel[["utilization", "gridX"]])

fig = go.Figure()

# Iterate over each group and add a line plot for each name
for name, group in kernel_events.groupby(['name']):
    print(group)
    fig.add_trace(go.Scatter(x=group['gridX'], y=group['utilization'], mode='lines+markers', name=str(group['name'])))

# Customize layout
fig.update_layout(title='Utilization per Name',
                  xaxis_title='gridX',  # You can replace 'Index' with any other suitable label
                  yaxis_title='Utilization')

fig.update_xaxes(type="log")

# Show the plot
fig.show()




          start          end  deviceId  contextId  streamId  correlationId  \
0   10490394028  10505704648         3          1         7            940   
1   13655577733  13668024233         3          1         7         131921   
8   15524133684  15536556346         3          1         7         133287   
14  36056233782  36071561495         3          1         7         657146   
15  39259047656  39271470416         3          1         7         788127   
22  41086937481  41099358865         3          1         7         788179   

          globalPid  demangledName  shortName  mangledName  ...  \
0   343301802688512           2809       2810         2780  ...   
1   343301802688512           2809       2810         2780  ...   
8   343301802688512           2809       2810         2780  ...   
14  343301802688512           2809       2810         2780  ...   
15  343301802688512           2809       2810         2780  ...   
22  343301802688512           2809       2810      



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

