In [1]:
from __future__ import print_function

Load in the Python modules necessary to load the data (gzip and yaml), process the data (numpy and pandas), and plot the data (toyplot).

In [2]:
import gzip
import yaml
import numpy
import pandas
import toyplot.pdf

print("yaml version:    ", yaml.__version__)
print("numpy version:   ", numpy.__version__)
print("pandas version:  ", pandas.__version__)
print("toyplot version: ", toyplot.__version__)

yaml version:     3.12
numpy version:    1.14.0
pandas version:   0.22.0
toyplot version:  0.16.0


## Ingest Data

Load the data, which is output into a YAML files.

In [3]:
filename = 'miniGraphics-skybridge-vn-scaling.yaml.gz'
yaml_data = yaml.load(gzip.open(filename))
data_vn = pandas.DataFrame(yaml_data)

filename = 'miniGraphics-skybridge-smp-scaling.yaml.gz'
yaml_data = yaml.load(gzip.open(filename))
data_smp = pandas.DataFrame(yaml_data)

The YAML data is hierarchical. The basic yaml reader to DataFrame just embeds dictionaries and lists in DataFrame columns. Fix that by expanding the data of these columns into new columns.

In [4]:
def expand_single_column(original_data, column_to_expand):

    expanded_data = pandas.DataFrame()
    for index in original_data.index:
        sub_table = pandas.DataFrame(original_data[column_to_expand][index])
        for column in original_data.columns:
            if column != column_to_expand:
                sub_table[column] = numpy.full(sub_table.index.shape,
                                               original_data[column][index],
                                               dtype=original_data[column].dtype)
        expanded_data = expanded_data.append(sub_table, ignore_index=True)
    return expanded_data

def flatten_table(original_data):
    flat_data = original_data
    for column_name in original_data.columns:
        if isinstance(flat_data[column_name][0], list):
            flat_data = expand_single_column(flat_data, column_name)
    return flat_data

In [5]:
data_vn = flatten_table(data_vn)
data_smp = flatten_table(data_smp)

Add a column that gives a human-readable name to each image resolution.

In [6]:
image_height_names = {
    500: 'Desktop Window',
    1080: 'HDTV',
    4320: '8K UHD',
}

data_vn['image-size'] = data_vn['image-height'].map(image_height_names)
data_smp['image-size'] = data_smp['image-height'].map(image_height_names)

Rename the algorithms from the identifiers the program wrote out to the strings used in the paper. Note that there are some extras in the data that we are ignoring.

In [7]:
algorithm_names = {
    '2-3-SwapBase': '2-3 Swap',
    'BinarySwapFold': 'Naive',
    'BinarySwapTelescoping': 'Telescoping',
    'BinarySwapRemainder': 'Remainder',
    'IceTBase': 'IceT'
}

data_vn['composite-algorithm'] = data_vn['composite-algorithm'].map(algorithm_names)
data_smp['composite-algorithm'] = data_smp['composite-algorithm'].map(algorithm_names)

Print a summary of the table data. There are multiple ways that Jupyter and pandas will report a summary of a table, but I find this method the most effective. It prints out every column. Then for all columns with a "small" number of unique values, it gives those values. This latter information really helps identify the proper way to group values.

In [8]:
import IPython.display

data_description = '##### Virtual Node\n\n'

for column_name in data_vn.columns:
    data_description = data_description + '**' + column_name + '**: '
    unique_values = data_vn[column_name].unique()
    if (len(unique_values) < 10):
        for value in unique_values:
            data_description = data_description + str(value) + ' '
    else:
        data_description = (
            data_description +
            str(numpy.min(unique_values)) + ' &ndash; ' +
            str(numpy.max(unique_values)) + ' '
        )
    data_description = data_description + ' \n'
    
data_description = data_description + '\n##### SMP\n\n'

for column_name in data_smp.columns:
    data_description = data_description + '**' + column_name + '**: '
    unique_values = data_smp[column_name].unique()
    if (len(unique_values) < 10):
        for value in unique_values:
            data_description = data_description + str(value) + ' '
    else:
        data_description = (
            data_description +
            str(numpy.min(unique_values)) + ' &ndash; ' +
            str(numpy.max(unique_values)) + ' '
        )
    data_description = data_description + ' \n'
    
IPython.display.display(IPython.display.Markdown(data_description))

  return umr_minimum(a, axis, None, out, keepdims)
  return umr_maximum(a, axis, None, out, keepdims)


##### Virtual Node

**color-buffer-format**: byte  
**composite-algorithm**: Naive Remainder Telescoping nan 2-3 Swap IceT  
**composite-seconds**: 0.00227687 &ndash; 28.7058  
**compress-seconds**: 2.315e-06 &ndash; 8e-06  
**construct-tree-seconds**: nan &ndash; nan  
**depth-buffer-format**: float  
**gather-seconds**: 0.00095674 &ndash; 4.52609  
**geometry**: box  
**geometry-distribution**: duplicate  
**geometry-overlap**: -0.05  
**icet-copy-result-seconds**: 1.304e-06 &ndash; 5e-05  
**image-compression**: True False  
**image-height**: 500 1080 4320  
**image-width**: 500 1920 7680  
**k**: nan &ndash; 8,8,8,8  
**max-image-split**: nan 1000000.0  
**num-processes**: 64 &ndash; 8192  
**num-triangles**: 768 &ndash; 98304  
**paint-seconds**: 0.00020071 &ndash; 0.170633  
**painter**: simple  
**partial-composite-seconds**: 0.00037141 &ndash; 27.0723  
**phi-rotation**: -178.406 &ndash; 136.756  
**random-seed**: 17627  
**rendering-order-dependent**: False  
**start-time**: 2018-04-03T12:58:47.000000000 &ndash; 2018-04-18T10:04:49.000000000  
**theta-rotation**: -177.945 &ndash; 179.418  
**total-seconds**: 0.00280725 &ndash; 28.7062  
**trial-num**: 0 &ndash; 19  
**uncompress-seconds**: 1.337e-06 &ndash; 8e-06  
**zoom**: 1.5  
**image-size**: Desktop Window HDTV 8K UHD  

##### SMP

**color-buffer-format**: byte  
**composite-algorithm**: Naive Remainder Telescoping nan 2-3 Swap IceT  
**composite-seconds**: 0.00136872 &ndash; 0.903929  
**compress-seconds**: 8.137e-06 &ndash; 2e-05  
**construct-tree-seconds**: nan &ndash; nan  
**depth-buffer-format**: float  
**gather-seconds**: 0.00071163 &ndash; 0.51739  
**geometry**: box  
**geometry-distribution**: duplicate  
**geometry-overlap**: -0.05  
**icet-copy-result-seconds**: nan &ndash; nan  
**image-compression**: True False  
**image-height**: 500 1080 4320  
**image-width**: 500 1920 7680  
**k**: nan &ndash; 8,8,4  
**max-image-split**: nan 1000000.0  
**num-processes**: 33 &ndash; 512  
**num-triangles**: 396 &ndash; 6144  
**paint-seconds**: 0.000216328 &ndash; 0.126701  
**painter**: simple  
**partial-composite-seconds**: 0.000240892 &ndash; 0.61017  
**phi-rotation**: -178.406 &ndash; 136.756  
**random-seed**: 17627  
**rendering-order-dependent**: False  
**start-time**: 2018-04-05T16:27:11.000000000 &ndash; 2018-04-18T10:00:13.000000000  
**theta-rotation**: -177.945 &ndash; 179.418  
**total-seconds**: 0.00175639 &ndash; 1.00893  
**trial-num**: 0 &ndash; 19  
**uncompress-seconds**: 2.56e-06 &ndash; 7e-06  
**zoom**: 1.5  
**image-size**: Desktop Window HDTV 8K UHD  


## Plot Data

We are plotting the time it takes to do a "partial composite" (that is the time to blend all the pixels, but the pixels are left distributed across all the processes).

The first thing we want to do is to average the time it took over all trials. This is easily done with a pivot table. We also need the filter the data to those that have been run in both VN and SMP modes. These are those between 64 and 512 processes.

In [9]:
filtered_indices = (
    (data_vn['num-processes'] >= 64) &
    (data_vn['num-processes'] <= 512)
)

average_partial_composite_vn = data_vn[filtered_indices].pivot_table(
    values='partial-composite-seconds',
    index='num-processes',
    columns=[
        'image-size',
        'composite-algorithm',
    ],
    aggfunc='mean',
)

average_partial_composite_vn

image-size,8K UHD,8K UHD,8K UHD,8K UHD,8K UHD,Desktop Window,Desktop Window,Desktop Window,Desktop Window,Desktop Window,HDTV,HDTV,HDTV,HDTV,HDTV
composite-algorithm,2-3 Swap,IceT,Naive,Remainder,Telescoping,2-3 Swap,IceT,Naive,Remainder,Telescoping,2-3 Swap,IceT,Naive,Remainder,Telescoping
num-processes,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
64,0.385781,0.065904,0.378764,0.37883,0.379878,0.002578,0.001049,0.002421,0.002388,0.002378,0.021511,0.004144,0.020699,0.02065,0.020404
96,0.386223,0.050465,0.689278,0.369679,0.37297,0.002681,0.000905,0.004221,0.00238,0.00233,0.020162,0.003233,0.038309,0.020051,0.020226
128,0.376284,0.04722,0.366858,0.36704,0.364874,0.002604,0.000921,0.005175,0.002306,0.002314,0.020711,0.003177,0.019816,0.019754,0.019822
144,0.385339,0.036686,0.675675,0.365959,0.366791,0.002709,0.000783,0.004077,0.002313,0.002327,0.02082,0.002511,0.037759,0.019917,0.019812
192,0.377325,0.034736,0.671982,0.363636,0.365166,0.002777,0.000807,0.00409,0.00238,0.002322,0.020209,0.002371,0.037716,0.019785,0.019953
256,0.374036,0.033015,0.364563,0.364479,0.364077,0.002898,0.000787,0.00231,0.002299,0.002304,0.02087,0.002478,0.019719,0.019568,0.019753
288,0.381443,0.027747,0.675306,0.364104,0.365186,0.003062,0.000904,0.004009,0.002395,0.002362,0.020717,0.001973,0.037366,0.019705,0.019729
384,0.372013,0.024902,0.667428,0.360336,0.359958,0.00308,0.000727,0.004071,0.002317,0.002307,0.020187,0.00191,0.0373,0.019585,0.019639
432,0.379653,0.026139,0.669627,0.36107,0.3638,0.003319,0.00075,0.004149,0.002303,0.002315,0.021254,0.002065,0.037033,0.019693,0.019699
512,0.374163,0.024898,0.362974,0.361116,0.36179,0.003412,0.000764,0.002316,0.002321,0.002323,0.021579,0.002171,0.019696,0.019818,0.019739


In [10]:
filtered_indices = (
    (data_smp['num-processes'] >= 64) &
    (data_smp['num-processes'] <= 512)
)

average_partial_composite_smp = data_smp[filtered_indices].pivot_table(
    values='partial-composite-seconds',
    index='num-processes',
    columns=[
        'image-size',
        'composite-algorithm',
    ],
    aggfunc='mean',
)

average_partial_composite_smp

image-size,8K UHD,8K UHD,8K UHD,8K UHD,8K UHD,Desktop Window,Desktop Window,Desktop Window,Desktop Window,Desktop Window,HDTV,HDTV,HDTV,HDTV,HDTV
composite-algorithm,2-3 Swap,IceT,Naive,Remainder,Telescoping,2-3 Swap,IceT,Naive,Remainder,Telescoping,2-3 Swap,IceT,Naive,Remainder,Telescoping
num-processes,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
64,0.307841,0.037455,0.290259,0.288952,0.291212,0.002017,0.000498,0.001866,0.001876,0.001889,0.016569,0.002347,0.01498,0.015495,0.015518
66,0.311272,0.036759,0.53512,0.284088,0.281505,0.001964,0.000494,0.003213,0.001737,0.001777,0.016374,0.002401,0.027608,0.014931,0.014982
72,0.320871,0.034298,0.559619,0.282821,0.283425,0.002058,0.000456,0.003543,0.001804,0.001813,0.017016,0.002127,0.029466,0.015071,0.015043
81,0.314611,0.036029,0.561087,0.2879,0.284968,0.002049,0.000499,0.003569,0.001795,0.001803,0.01699,0.002227,0.029696,0.015224,0.015316
96,0.302945,0.028671,0.562406,0.281927,0.282976,0.001976,0.00045,0.003524,0.00181,0.001803,0.016397,0.001894,0.029576,0.0153,0.015148
99,0.313586,0.033798,0.556161,0.285803,0.283042,0.002043,0.00054,0.003474,0.001782,0.001761,0.017046,0.002198,0.028939,0.01501,0.015044
108,0.312559,0.027405,0.557984,0.278308,0.2833,0.002119,0.000425,0.00348,0.001727,0.001744,0.017135,0.001834,0.029542,0.014959,0.015142
128,0.29229,0.023212,0.275243,0.275089,0.275514,0.001915,0.000416,0.001697,0.001728,0.001724,0.015988,0.001624,0.014803,0.014847,0.014812
132,0.300673,0.023672,0.532675,0.272355,0.275149,0.002045,0.000433,0.003179,0.001714,0.001723,0.016336,0.001744,0.027294,0.014566,0.01462
144,0.316673,0.025793,0.550598,0.277241,0.278625,0.002159,0.000397,0.003516,0.001742,0.001753,0.017132,0.001738,0.029429,0.015117,0.01501


In [11]:
numpy.max(numpy.max(average_partial_composite_vn['HDTV']))

0.03830913

Make a grouping structure of the data so we can pull out the actual data values for each trial.

In [12]:
image_size = 'HDTV'

canvas = toyplot.Canvas('9.25in', '3in',
                        #style={'background-color': 'yellow'},
                       )

x_vn = average_partial_composite_vn.index
x_smp = average_partial_composite_smp.index

maxy = max(
    numpy.max(numpy.max(average_partial_composite_vn[image_size])),
    numpy.max(numpy.max(average_partial_composite_smp[image_size])),
)

xlocator = toyplot.locator.Log(base=2, format='{:.0f}')

def doPlot(axes, algorithm):
    axes.x.ticks.locator = xlocator
    axes.y.domain.min = 0
    axes.y.domain.max = maxy
    
    y_vn = numpy.array(average_partial_composite_vn[image_size, algorithm])
    y_smp = numpy.array(average_partial_composite_smp[image_size, algorithm])

    axes.plot(x_vn, y_vn)
    axes.plot(x_smp, y_smp)

    axes.text(
        512, numpy.max(y_vn), 'Virtual Node: ' + algorithm,
        style={
            'text-anchor': 'end',
            '-toyplot-vertical-align': 'last-baseline',
        }
    )
    axes.text(
        512, numpy.min(y_smp), 'Pure Distributed: ' + algorithm,
        style={
            'text-anchor': 'end',
            '-toyplot-vertical-align': 'top',
        }
    )

axesNaive = canvas.cartesian(
    xlabel = 'Number of Processes',
    ylabel = 'Partial Composite Time (seconds)',
    xscale='log',
    bounds=('5%', '33%', '5%', '85%'),
)
doPlot(axesNaive, 'Naive')

axes23 = canvas.cartesian(
    xlabel = 'Number of Processes',
    #ylabel = 'Partial Composite Time (seconds)',
    xscale='log',
    bounds=('38%', '66%', '5%', '85%'),
)
doPlot(axes23, '2-3 Swap')

axesRemainder = canvas.cartesian(
    xlabel = 'Number of Processes',
    #ylabel = 'Partial Composite Time (seconds)',
    xscale='log',
    bounds=('70%', '99%', '5%', '85%'),
)
doPlot(axesRemainder, 'Remainder')

In [13]:
toyplot.pdf.render(canvas, 'vn-vs-smp-hdtv.pdf')

In [14]:
image_size = '8K UHD'

canvas = toyplot.Canvas('9.25in', '3in',
                        #style={'background-color': 'yellow'},
                       )

x_vn = average_partial_composite_vn.index
x_smp = average_partial_composite_smp.index

maxy = max(
    numpy.max(numpy.max(average_partial_composite_vn[image_size])),
    numpy.max(numpy.max(average_partial_composite_smp[image_size])),
)

xlocator = toyplot.locator.Log(base=2, format='{:.0f}')

def doPlot(axes, algorithm):
    axes.x.ticks.locator = xlocator
    axes.y.domain.min = 0
    axes.y.domain.max = maxy
    
    y_vn = numpy.array(average_partial_composite_vn[image_size, algorithm])
    y_smp = numpy.array(average_partial_composite_smp[image_size, algorithm])

    axes.plot(x_vn, y_vn)
    axes.plot(x_smp, y_smp)

    axes.text(
        512, numpy.max(y_vn), 'Virtual Node: ' + algorithm,
        style={
            'text-anchor': 'end',
            '-toyplot-vertical-align': 'last-baseline',
        }
    )
    axes.text(
        512, numpy.min(y_smp), 'Pure Distributed: ' + algorithm,
        style={
            'text-anchor': 'end',
            '-toyplot-vertical-align': 'top',
        }
    )

axesNaive = canvas.cartesian(
    xlabel = 'Number of Processes',
    ylabel = 'Partial Composite Time (seconds)',
    xscale='log',
    bounds=('5%', '33%', '5%', '85%'),
)
doPlot(axesNaive, 'Naive')

axes23 = canvas.cartesian(
    xlabel = 'Number of Processes',
    #ylabel = 'Partial Composite Time (seconds)',
    xscale='log',
    bounds=('38%', '66%', '5%', '85%'),
)
doPlot(axes23, '2-3 Swap')

axesRemainder = canvas.cartesian(
    xlabel = 'Number of Processes',
    #ylabel = 'Partial Composite Time (seconds)',
    xscale='log',
    bounds=('70%', '99%', '5%', '85%'),
)
doPlot(axesRemainder, 'Remainder')

In [15]:
toyplot.pdf.render(canvas, 'vn-vs-smp-8k.pdf')