Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions lib/benchmark_runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,6 @@ def free_file_no(directory)
end
end

# Sort benchmarks with headlines first, then others, then micro
def sort_benchmarks(bench_names, metadata)
headline_benchmarks = metadata.select { |_, meta| meta['category'] == 'headline' }.keys
micro_benchmarks = metadata.select { |_, meta| meta['category'] == 'micro' }.keys

headline_names, bench_names = bench_names.partition { |name| headline_benchmarks.include?(name) }
micro_names, other_names = bench_names.partition { |name| micro_benchmarks.include?(name) }
headline_names.sort + other_names.sort + micro_names.sort
end

# Checked system - error or return info if the command fails
def check_call(command, env: {}, raise_error: true, quiet: false)
puts("+ #{command}") unless quiet
Expand Down
170 changes: 170 additions & 0 deletions lib/results_table_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
require_relative '../misc/stats'
require 'yaml'

class ResultsTableBuilder
SECONDS_TO_MS = 1000.0
BYTES_TO_MIB = 1024.0 * 1024.0

def initialize(executable_names:, bench_data:, include_rss: false)
@executable_names = executable_names
@bench_data = bench_data
@include_rss = include_rss
@base_name = executable_names.first
@other_names = executable_names[1..]
@bench_names = compute_bench_names
end

def build
table = [build_header]
format = build_format

@bench_names.each do |bench_name|
next unless has_complete_data?(bench_name)

row = build_row(bench_name)
table << row
end

[table, format]
end

private

def has_complete_data?(bench_name)
@bench_data.all? { |(_k, v)| v[bench_name] }
end

def build_header
header = ["bench"]

@executable_names.each do |name|
header << "#{name} (ms)" << "stddev (%)"
header << "RSS (MiB)" if @include_rss
end

@other_names.each do |name|
header << "#{name} 1st itr"
end

@other_names.each do |name|
header << "#{@base_name}/#{name}"
end

header
end

def build_format
format = ["%s"]

@executable_names.each do |_name|
format << "%.1f" << "%.1f"
format << "%.1f" if @include_rss
end

@other_names.each do |_name|
format << "%.3f"
end

@other_names.each do |_name|
format << "%.3f"
end

format
end

def build_row(bench_name)
t0s = extract_first_iteration_times(bench_name)
times_no_warmup = extract_benchmark_times(bench_name)
rsss = extract_rss_values(bench_name)

base_t0, *other_t0s = t0s
base_t, *other_ts = times_no_warmup
base_rss, *other_rsss = rsss

row = [bench_name]
build_base_columns(row, base_t, base_rss)
build_comparison_columns(row, other_ts, other_rsss)
build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)

row
end

def build_base_columns(row, base_t, base_rss)
row << mean(base_t)
row << stddev_percent(base_t)
row << base_rss if @include_rss
end

def build_comparison_columns(row, other_ts, other_rsss)
other_ts.zip(other_rsss).each do |other_t, other_rss|
row << mean(other_t)
row << stddev_percent(other_t)
row << other_rss if @include_rss
end
end

def build_ratio_columns(row, base_t0, other_t0s, base_t, other_ts)
ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }
row.concat(ratio_1sts)
row.concat(ratios)
end

def extract_first_iteration_times(bench_name)
@executable_names.map do |name|
data = bench_data_for(name, bench_name)
(data['warmup'][0] || data['bench'][0]) * SECONDS_TO_MS
end
end

def extract_benchmark_times(bench_name)
@executable_names.map do |name|
bench_data_for(name, bench_name)['bench'].map { |v| v * SECONDS_TO_MS }
end
end

def extract_rss_values(bench_name)
@executable_names.map do |name|
bench_data_for(name, bench_name)['rss'] / BYTES_TO_MIB
end
end

def bench_data_for(name, bench_name)
@bench_data[name][bench_name]
end

def mean(values)
Stats.new(values).mean
end

def stddev(values)
Stats.new(values).stddev
end

def stddev_percent(values)
100 * stddev(values) / mean(values)
end

def compute_bench_names
benchmarks_metadata = YAML.load_file('benchmarks.yml')
sort_benchmarks(all_benchmark_names, benchmarks_metadata)
end

def all_benchmark_names
@bench_data.values.flat_map(&:keys).uniq
end

# Sort benchmarks with headlines first, then others, then micro
def sort_benchmarks(bench_names, metadata)
bench_names.sort_by { |name| [category_priority(name, metadata), name] }
end

def category_priority(bench_name, metadata)
category = metadata.dig(bench_name, 'category') || 'other'
case category
when 'headline' then 0
when 'micro' then 2
else 1
end
end
end
77 changes: 10 additions & 67 deletions run_benchmarks.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,12 @@
require 'rbconfig'
require 'etc'
require 'yaml'
require_relative 'misc/stats'
require_relative 'lib/cpu_config'
require_relative 'lib/benchmark_runner'
require_relative 'lib/benchmark_suite'
require_relative 'lib/table_formatter'
require_relative 'lib/argument_parser'

def mean(values)
Stats.new(values).mean
end

def stddev(values)
Stats.new(values).stddev
end

def sort_benchmarks(bench_names)
benchmarks_metadata = YAML.load_file('benchmarks.yml')
BenchmarkRunner.sort_benchmarks(bench_names, benchmarks_metadata)
end
require_relative 'lib/results_table_builder'

args = ArgumentParser.parse(ARGV)

Expand All @@ -36,15 +23,14 @@ def sort_benchmarks(bench_names)
FileUtils.mkdir_p(args.out_path)

ruby_descriptions = {}
args.executables.each do |name, executable|
ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp
end

# Benchmark with and without YJIT
bench_start_time = Time.now.to_f
bench_data = {}
bench_failures = {}
args.executables.each do |name, executable|
ruby_descriptions[name] = `#{executable.shelljoin} -v`.chomp

suite = BenchmarkSuite.new(
ruby: executable,
ruby_description: ruby_descriptions[name],
Expand All @@ -61,9 +47,6 @@ def sort_benchmarks(bench_names)
end

bench_end_time = Time.now.to_f
# Get keys from all rows in case a benchmark failed for only some executables.
bench_names = sort_benchmarks(bench_data.map { |k, v| v.keys }.flatten.uniq)

bench_total_time = (bench_end_time - bench_start_time).to_i
puts("Total time spent benchmarking: #{bench_total_time}s")

Expand All @@ -73,55 +56,15 @@ def sort_benchmarks(bench_names)

puts

# Table for the data we've gathered
# Build results table
all_names = args.executables.keys
base_name, *other_names = all_names
table = [["bench"]]
format = ["%s"]
all_names.each do |name|
table[0] += ["#{name} (ms)", "stddev (%)"]
format += ["%.1f", "%.1f"]
if args.rss
table[0] += ["RSS (MiB)"]
format += ["%.1f"]
end
end
other_names.each do |name|
table[0] += ["#{name} 1st itr"]
format += ["%.3f"]
end
other_names.each do |name|
table[0] += ["#{base_name}/#{name}"]
format += ["%.3f"]
end

# Format the results table
bench_names.each do |bench_name|
# Skip this bench_name if we failed to get data for any of the executables.
next unless bench_data.all? { |(_k, v)| v[bench_name] }

t0s = all_names.map { |name| (bench_data[name][bench_name]['warmup'][0] || bench_data[name][bench_name]['bench'][0]) * 1000.0 }
times_no_warmup = all_names.map { |name| bench_data[name][bench_name]['bench'].map { |v| v * 1000.0 } }
rsss = all_names.map { |name| bench_data[name][bench_name]['rss'] / 1024.0 / 1024.0 }

base_t0, *other_t0s = t0s
base_t, *other_ts = times_no_warmup
base_rss, *other_rsss = rsss

ratio_1sts = other_t0s.map { |other_t0| base_t0 / other_t0 }
ratios = other_ts.map { |other_t| mean(base_t) / mean(other_t) }

row = [bench_name, mean(base_t), 100 * stddev(base_t) / mean(base_t)]
row << base_rss if args.rss
other_ts.zip(other_rsss).each do |other_t, other_rss|
row += [mean(other_t), 100 * stddev(other_t) / mean(other_t)]
row << other_rss if args.rss
end

row += ratio_1sts + ratios

table << row
end
builder = ResultsTableBuilder.new(
executable_names: all_names,
bench_data: bench_data,
include_rss: args.rss
)
table, format = builder.build

output_path = nil
if args.out_override
Expand Down
51 changes: 0 additions & 51 deletions test/benchmark_runner_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,57 +49,6 @@
end
end

describe '.sort_benchmarks' do
before do
@metadata = {
'fib' => { 'category' => 'micro' },
'railsbench' => { 'category' => 'headline' },
'optcarrot' => { 'category' => 'headline' },
'some_bench' => { 'category' => 'other' },
'another_bench' => { 'category' => 'other' },
'zebra' => { 'category' => 'other' }
}
end

it 'sorts benchmarks with headlines first, then others, then micro' do
bench_names = ['fib', 'some_bench', 'railsbench', 'another_bench', 'optcarrot']
result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata)

# Headlines should be first
headline_indices = [result.index('railsbench'), result.index('optcarrot')]
assert_equal true, headline_indices.all? { |i| i < 2 }

# Micro should be last
assert_equal 'fib', result.last

# Others in the middle
other_indices = [result.index('some_bench'), result.index('another_bench')]
assert_equal true, other_indices.all? { |i| i >= 2 && i < result.length - 1 }
end

it 'sorts alphabetically within categories' do
bench_names = ['zebra', 'another_bench', 'some_bench']
result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata)
assert_equal ['another_bench', 'some_bench', 'zebra'], result
end

it 'handles empty list' do
result = BenchmarkRunner.sort_benchmarks([], @metadata)
assert_equal [], result
end

it 'handles single benchmark' do
result = BenchmarkRunner.sort_benchmarks(['fib'], @metadata)
assert_equal ['fib'], result
end

it 'handles only headline benchmarks' do
bench_names = ['railsbench', 'optcarrot']
result = BenchmarkRunner.sort_benchmarks(bench_names, @metadata)
assert_equal ['optcarrot', 'railsbench'], result
end
end

describe '.check_call' do
it 'runs a successful command and returns success status' do
result = nil
Expand Down
Loading
Loading