In [13]:
from __future__ import print_function

Load in the Python modules necessary to load the data (gzip and yaml), process the data (numpy and pandas), and plot the data (toyplot).

In [14]:
import gzip
import yaml
import numpy
import pandas
import toyplot.pdf

print("yaml version:    ", yaml.__version__)
print("numpy version:   ", numpy.__version__)
print("pandas version:  ", pandas.__version__)
print("toyplot version: ", toyplot.__version__)

yaml version:     3.12
numpy version:    1.13.3
pandas version:   0.20.3
toyplot version:  0.16.0


## Ingest Data

Load the data, which is output into a YAML file. (Actually, there are several runs that have been concatinated into a YAML file that has a sequence in its top level.)

In [15]:
filename = 'miniGraphics-skybridge-vn-scaling.yaml.gz'
yaml_data = yaml.load(gzip.open(filename))
data = pandas.DataFrame(yaml_data)

Some of the original runs gave suspect results where the runs took much longer than expected. These appear to be transient issues in the system since a repeat of the run is closer to what is expected. To get more realistic results, I re-ran these conditions. So jettison these runs from the original data and load up the re-runs.

In [16]:
# Remove incorrect readings
data = data.loc[
    ((data['composite-algorithm'] != '2-3-SwapBase') |
     (data['num-processes'] != 4096) |
     (data['image-height'] != 1080)) &
    ((data['composite-algorithm'] != 'BinarySwapFold') |
     (data['num-processes'] != 2896) |
     (data['image-height'] != 1080)) &
    ((data['composite-algorithm'] != 'BinarySwapRemainder') |
     (data['num-processes'] != 2048) |
     (data['image-height'] != 1080)) &
    ((data['composite-algorithm'] != 'BinarySwapTelescoping') |
     (data['num-processes'] != 1616) |
     (data['image-height'] != 1080))
]
# Load correct readings and add them to the data
filename = 'miniGraphics-skybridge-vn-corrections.yaml.gz'
yaml_data = yaml.load(gzip.open(filename))
data = pandas.concat([data, pandas.DataFrame(yaml_data)], ignore_index=True)

The YAML data is hierarchical. The basic yaml reader to DataFrame just embeds dictionaries and lists in DataFrame columns. Fix that by expanding the data of these columns into new columns.

In [17]:
def expand_single_column(original_data, column_to_expand):

    expanded_data = pandas.DataFrame()
    for index in original_data.index:
        sub_table = pandas.DataFrame(original_data[column_to_expand][index])
        for column in original_data.columns:
            if column != column_to_expand:
                sub_table[column] = numpy.full(sub_table.index.shape,
                                               original_data[column][index],
                                               dtype=original_data[column].dtype)
        expanded_data = expanded_data.append(sub_table, ignore_index=True)
    return expanded_data

def flatten_table(original_data):
    flat_data = original_data
    for column_name in original_data.columns:
        if isinstance(flat_data[column_name][0], list):
            flat_data = expand_single_column(flat_data, column_name)
    return flat_data

In [18]:
data = flatten_table(data)

Add a column that gives a human-readable name to each image resolution.

In [19]:
image_height_names = {
    500: 'Desktop Window',
    1080: 'HDTV',
    4320: '8K UHD',
}

data['image-size'] = data['image-height'].map(image_height_names)

Rename the algorithms from the identifiers the program wrote out to the strings used in the paper. Note that there are some extras in the data that we are ignoring.

In [20]:
algorithm_names = {
    '2-3-SwapBase': '2-3 Swap',
    'BinarySwapFold': 'Naive',
    'BinarySwapTelescoping': 'Telescoping',
    'BinarySwapRemainder': 'Remainder',
    'IceTBase': 'IceT'
}

data['composite-algorithm'] = data['composite-algorithm'].map(algorithm_names)

Print a summary of the table data. There are multiple ways that Jupyter and pandas will report a summary of a table, but I find this method the most effective. It prints out every column. Then for all columns with a "small" number of unique values, it gives those values. This latter information really helps identify the proper way to group values.

In [21]:
import IPython.display

data_description = ''

for column_name in data.columns:
    data_description = data_description + '**' + column_name + '** '
    unique_values = data[column_name].unique()
    if (len(unique_values) < 10):
        for value in unique_values:
            data_description = data_description + str(value) + ' '
    elif (numpy.issubdtype(unique_values.dtype, numpy.number)):
        data_description = (
            data_description +
            str(numpy.nanmin(unique_values)) + ' &ndash; ' +
            str(numpy.nanmax(unique_values)) + ' '
        )
    elif not pandas.isnull(unique_values).any():
        data_description = (
            data_description +
            str(numpy.min(unique_values)) + ' &ndash; ' +
            str(numpy.max(unique_values)) + ' '
        )
    data_description = data_description + ' \n'
    
IPython.display.display(IPython.display.Markdown(data_description))

**color-buffer-format** byte  
**composite-algorithm** Naive Remainder Telescoping 2-3 Swap IceT nan  
**composite-seconds** 0.00119348 &ndash; 3.49925  
**compress-seconds**  
**construct-tree-seconds** 0.000182052 &ndash; 0.0610641  
**depth-buffer-format** float  
**gather-seconds** 0.00071677 &ndash; 3.45673  
**geometry** box  
**geometry-distribution** duplicate  
**geometry-overlap** -0.05  
**icet-copy-result-seconds**  
**image-compression** True False  
**image-height** 288 1080 4320  
**image-width** 512 1920 7680  
**num-processes** 128 &ndash; 8192  
**num-triangles** 1536 &ndash; 98304  
**paint-seconds** 0.00011823 &ndash; 0.147793  
**painter** simple  
**partial-composite-seconds** 0.000293153 &ndash; 0.86969  
**phi-rotation** -178.406 &ndash; 136.756  
**random-seed** 17627  
**rendering-order-dependent** False  
**start-time** 2018-05-30T02:23:10.000000000 &ndash; 2018-06-08T12:10:45.000000000  
**theta-rotation** -177.945 &ndash; 179.418  
**total-seconds** 0.00146716 &ndash; 3.59942  
**trial-num** 0 &ndash; 19  
**uncompress-seconds**  
**zoom** 1.5  
**image-size** nan HDTV 8K UHD  


## Plot Data

We are plotting for the 2-3 swap algorithm the time it takes to the "partial composite" breaking it up into the time for building the compositing tree and for everything else.

In [22]:
data['transfer-blend-seconds'] = data['partial-composite-seconds'] - data['construct-tree-seconds']

In [23]:
averages = data.pivot_table(
    values=['transfer-blend-seconds', 'construct-tree-seconds'],
    index='num-processes',
    columns=[
        'image-size',
        'composite-algorithm',
    ],
    aggfunc='mean',
)

averages

Unnamed: 0_level_0,construct-tree-seconds,construct-tree-seconds,transfer-blend-seconds,transfer-blend-seconds
image-size,8K UHD,HDTV,8K UHD,HDTV
composite-algorithm,2-3 Swap,2-3 Swap,2-3 Swap,2-3 Swap
num-processes,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
128,0.000217,0.000217,0.424664,0.023374
160,0.000246,0.000255,0.440288,0.024236
176,0.000257,0.000273,0.443721,0.023623
192,0.000284,0.000275,0.437828,0.023621
224,0.000336,0.000336,0.437326,0.024135
256,0.000436,0.000421,0.413616,0.022869
272,0.000448,0.000448,0.428039,0.023452
320,0.000505,0.000499,0.42589,0.023074
352,0.00055,0.000545,0.426634,0.022911
400,0.000613,0.000611,0.425138,0.023225


Create a filled chart showing the times to construct the composite tree and the rest of the composite.

In [24]:
image_size = 'HDTV'
algorithm = '2-3 Swap'

canvas = toyplot.Canvas('4.5in', '3in')

axes = canvas.cartesian(
    xlabel='Number of Processes',
    ylabel='Partial Composite Time (seconds)',
    xscale='log',
    bounds=(45,-15,15,-50),
)

#axes.x.ticks.locator = toyplot.locator.Log(base=2, format='{:.0f}')
axes.x.ticks.locator = toyplot.locator.Explicit(
    locations=[128, 256, 512, 1024, 2048, 4096, 8192],
)
axes.y.domain.min = 0

x = averages.index
y = numpy.column_stack((
    numpy.zeros(numpy.shape(x)),
    numpy.array(averages['transfer-blend-seconds', image_size, algorithm]),
    numpy.array(averages['construct-tree-seconds', image_size, algorithm] +
                averages['transfer-blend-seconds', image_size, algorithm]),
))

axes.fill(
    x,
    y,
    color=toyplot.color.Palette()[0],
    opacity=[1.0, 0.6],
)

axes.text(
    1300,
    0.04,
    'Constructing Composite Tree',
    color=toyplot.color.Palette()[0],
    #opacity=0.6,
)

axes.text(
    1300,
    0.01,
    'Remaining Compositing (Transfer/Blending)',
    color='white',
)

<toyplot.mark.Text at 0x12b9e3850>

In [25]:
toyplot.pdf.render(canvas, '2-3-swap-overhead.pdf')