In [None]:
import altair as alt
import json
import pandas as pd
import numpy as np
import warnings

from pybench.chart import grouped_bar_chart
import pybench.themes
from pybench.utils import (
    benchmark_json_to_pandas,
    compute_speedup,
    filter_by_string_in_column,
    significant_round,
    split_params_list,
)

In [None]:
benchmark_json_to_pandas.__globals__['__file__']


In [None]:
# Read a pytest-benchmark json into a pandas DataFrame.
# json_dir = '/location/of/data'
df_h100 = benchmark_json_to_pandas(json_dir + 'benchmark_results_h100.json')
df_a100 = benchmark_json_to_pandas(json_dir + 'benchmark_results_a100.json')

In [None]:
df_h100

In [None]:
# Split list 'params.shape' into multiple columns.
# In this example the list is split in 'params.shape.0' and 'params.shape.1'.
df_h100 = split_params_list(df_h100, 'params.shape')
df_a100 = split_params_list(df_a100, 'params.shape')

In [None]:
# Split only "fast" operations into `cupy_df`.
cupy_df_h100 = filter_by_string_in_column(df_h100, 'name', 'cupy')
cupy_df_a100 = filter_by_string_in_column(df_a100, 'name', 'cupy')

In [None]:
# Split only "slow" operations into `numpy_df`.
numpy_df_h100 = filter_by_string_in_column(df_h100, 'name', 'numpy')
numpy_df_a100 = filter_by_string_in_column(df_a100, 'name', 'numpy')

In [None]:
# Define the list of operations we want to plot.
operation_list = [
    'FFT',
    'Sum',
    'Standard_Deviation',
    'Elementwise',
    'Matrix_Multiplication',
    'Array_Slicing',
    'SVD',
    'Stencil'
]

In [None]:
# Define the list of parameters we will use to match entries in "fast" and
# "slow" DataFrames.
param_list = ['params.shape.0', 'params.shape.1']

In [None]:
# Define the metric we want to use to compute the speedup.
stats_param = 'stats.median'

In [None]:
# Compute the speedup DataFrame.
speedups_df_h100 = compute_speedup(numpy_df_h100, cupy_df_h100, operation_list, param_list, stats_param)
speedups_df_a100 = compute_speedup(numpy_df_a100, cupy_df_a100, operation_list, param_list, stats_param)

In [None]:
speedups_df_h100

In [None]:
# Remove dots from column names, altair doesn't seem to work with them.
speedups_df_h100 = speedups_df_h100.rename(columns={'params.shape.0': 'shape0'})
speedups_df_h100 = speedups_df_h100.rename(columns={'params.shape.1': 'shape1'})
speedups_df_a100 = speedups_df_a100.rename(columns={'params.shape.0': 'shape0'})
speedups_df_a100 = speedups_df_a100.rename(columns={'params.shape.1': 'shape1'})

# Drop rows based on certain conditions, such as their shapes
speedups_df_h100.drop(speedups_df_h100[speedups_df_h100['shape0'] == 20000].index, inplace=True)
speedups_df_a100.drop(speedups_df_a100[speedups_df_a100['shape0'] == 20000].index, inplace=True)


# Give human readable names to shapes and store them in column 'size'
speedups_df_h100['size'] = speedups_df_h100['shape0'].apply(lambda row: '800MB' if row == 10000 else '8MB')
speedups_df_a100['size'] = speedups_df_a100['shape0'].apply(lambda row: '800MB' if row == 10000 else '8MB')


# Replace underscores with spaces in operation names for better printing
speedups_df_h100['name'] = speedups_df_h100['name'].apply(lambda n: n.replace('_', ' '))
speedups_df_a100['name'] = speedups_df_a100['name'].apply(lambda n: n.replace('_', ' '))

In [None]:
speedups_df_a100

In [None]:
# Enable theme "RAPIDS" from pybench.themes
alt.themes.enable("RAPIDS")

# Create altair chart from `speedups_df`
chart = grouped_bar_chart(
    speedups_df_a100,
    'size',
    'speedup',
    'name',
    ['800MB', '8MB'],
    y_title='GPU Speedup Over CPU',
    y_scale_type='symlog',
    y_tick_count=5,
    group_title='Operation on A100 GPU',
    bar_title_angle=-20,
    legend_title='Array Size (MB)',
    group_height=500,
    group_width=80)

In [None]:
chart

In [None]:
# Save chart to HTML file
# chart.save('plot_array_example.html')