Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 290 lines (256 sloc) 12.8 KB
#!/usr/bin/env ruby
#
# Copyright © 2015 Siarhei Siamashka <siarhei.siamashka@gmail.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
###############################################################################
# Assuming Gaussian distribution of the DRAM clock speeds where the #
# lima-memtester test starts to detect reliability problems due to variances #
# between different board samples (but the same board model!), predict how #
# many boards of the same type are expected to pass/fail the test at each #
# DRAM clock speed. #
# #
# If a "good" DRAM clock frequency (passes the test) is 672 MHz and a "bad" #
# clock frequency is 696 MHz, then we are assuming that the cross over point #
# is somewhere in the middle. So we want to pass the "(672 + 696) / 2" value #
# into this script. In order to make this easier, there is the <freq_delta> #
# command line argument, which can be used to add a 12 MHz shift to the #
# "good" value or subtract 12 MHz from the "bad" value. It is a convenience #
# feature to avoid maual adjustment of the DRAM clock frequency lists #
# used as input for this script. #
###############################################################################
require 'tempfile'
if ARGV.size < 1
printf("Usage: #{$PROGRAM_NAME} <freq_delta> <png_file>\n")
printf("Where:\n")
printf(" freq_delta - adjustment for the frequency.\n")
printf(" png_file - resulting PNG file with the plot.\n")
printf("\n")
printf("This script reads the list of space separated DRAM clock\n")
printf("frequency values from STDIN, adds <freq_delta> to them and\n")
printf("generates a gnuplot chart. Several groups of DRAM clock\n")
printf("frequencies can be also comma separated and shown on the\n")
printf("same chart using different colors (blue, green, red, pink).\n")
printf("\n")
printf("Example:\n")
printf(" echo '672 696, 648 648 672' | lima-memtester-genchart 12 out.png\n")
printf("\n")
printf("In this example the cumulative distribution function for the data\n")
printf("set [684 708] will be shown as a blue line and the cumulative\n")
printf("distribution function for the set [660 660 684] will be shown\n")
printf("as a green line in the 'out.png' output file.\n")
exit(1)
end
freq_delta = ARGV[0].to_f
$binning_interval = (freq_delta.abs * 2).to_i
###############################################################################
# Parse html page from mediawiki #
###############################################################################
def html_parse(data)
out = []
data.scan(/(<table\s+class=\"wikitable\">)(.*?)(<\/table>)/m) do |_, tbl, _|
result1 = []
tbl.scan(/(<tr>)(.*?)(<\/tr>)/m) do |_, row, _|
tmp = []
row.scan(/(<th>)(.*?)(<\/th>)/m) { |_, x, _| tmp.push(x.strip) }
row.scan(/(<td>)(.*?)(<\/td>)/m) { |_, x, _| tmp.push(x.strip) }
result1.push(tmp)
end
if result1.size > 1 && ((result1[0][2] =~ /lima\-memtester passes/) &&
(result1[0][3] =~ /lima\-memtester fails/))
good_table = true
# Select only rows with exactly 5 columns
result2 = result1.select { |x| x.size == 5 }
# The first sanity check
good_table = false if result2.size != result1.size
# Remove the table header
result2.shift if good_table
# Extract the DRAM clock frequencies (the cross over point)
result3 = result2.map do |x|
lo = x[2].gsub(/<[\/]?b>/, "").to_i
hi = x[3].gsub(/<[\/]?b>/, "").to_i
$binning_interval = (hi - lo) if $binning_interval == 0
if lo == 0 || hi == 0 || lo >= hi || (hi - lo) != $binning_interval
printf("Problems parsing row %s\n", x)
good_table = false
end
(lo + hi).to_f / 2
end
out.push(result3) if good_table
end
end
return out
end
###############################################################################
# Chebyshev's inequality for lower semivariance #
###############################################################################
# https://en.wikipedia.org/wiki/Chebyshev%27s_inequality#Semivariances
def chebyshev_lower_semivariance_p(samples, val)
sample_mean = samples.reduce(:+) / samples.size.to_f
lower_semivariance = samples.select {|x| x < sample_mean }.reduce(0) {|sum, x|
sum + (x - sample_mean) ** 2} / (samples.size - 1)
a = (sample_mean - val) / Math.sqrt(lower_semivariance)
if val < sample_mean
[1.0 / (a * a), 1.0].min
else
1.0
end
end
###############################################################################
# Multinomial test (by calling an external R programming language interpreter #
# with XNomial library). If there are two sets of DRAM clock frequencies #
# provided then use the first set for training and the second set for testing #
###############################################################################
def multinomial_test(multinomial_data)
if multinomial_data.size >= 1
cutoff_prob = 0.0001
train_idx = 0
test_idx = (multinomial_data.size >= 2 ? 1 : 0)
tmp = {}
multinomial_data[train_idx][:prob].each do |freq, prob|
tmp[freq] = [prob, 0] if prob >= cutoff_prob
end
multinomial_data[test_idx][:observed].each do |freq, observed|
prob = (multinomial_data[train_idx][:prob][freq] || 0.0)
next if observed == 0 && prob < cutoff_prob
tmp[freq] = [prob, observed]
end
tmp = tmp.sort
# https://cran.r-project.org/web/packages/XNomial/vignettes/XNomial.html
r_input = sprintf("library(XNomial)\n\n")
r_input += sprintf("prob <- c(%s)\n", tmp.map {|x| x[1][0]}.join(", "))
r_input += sprintf("observed <- c(%s)\n", tmp.map {|x| x[1][1]}.join(", "))
r_input += sprintf("out <- xmulti(observed, prob, detail=3)\n")
r_output = `echo '#{r_input}' | R --vanilla 2>/dev/null`
if r_output
x_output = ""
r_output.each_line do |l|
x_output += " " + l.strip + "\n" if l =~ /^P value/
end
return x_output if x_output != ""
end
end
end
###############################################################################
# Cumulative distribution function #
###############################################################################
# https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function
def cumulative_distribution_function(x, sample_mean, sample_stddev)
return 0.5 * (1 + Math.erf((x - sample_mean) / (sample_stddev * Math.sqrt(2))))
end
###############################################################################
# Analyze data, calculate probabilities #
###############################################################################
if ARGV[0] =~ /^http/
require 'open-uri'
html_data = open(ARGV[0], &:read)
data_sets = html_parse(html_data)
else
data_sets = STDIN.read.split(",").map {|s| s.split.map {|x| x.to_f + freq_delta }}
end
printf("Frequency shift : %d MHz\n", freq_delta.to_i)
printf("Binning interval : %d MHz\n", $binning_interval)
multinomial_data = []
data_sets.each do |samples|
printf("Processing data : %s\n", samples.map {|x| x.to_i })
abort "As least two DRAM clock speed samples are needed" if samples.size < 2
sample_mean = samples.reduce(:+) / samples.size.to_f
sample_stddev = Math.sqrt(samples.reduce(0) {|sum, x| sum + (x - sample_mean) ** 2} /
(samples.size - 1))
printf("Sample mean : %f\n", sample_mean)
printf("Sample stddev : %f\n", sample_stddev)
# Analyze the frequencies range plus/minus six standard deviations from the mean
mid_freq = sample_mean.to_i / $binning_interval * $binning_interval
min_freq = mid_freq - (sample_stddev * 6).to_i / $binning_interval * $binning_interval
max_freq = mid_freq + (sample_stddev * 6).to_i / $binning_interval * $binning_interval
tmp = {}
tmp[:mean] = sample_mean
tmp[:stddev] = sample_stddev
tmp[:observed] = {}
tmp[:prob] = {}
(min_freq..max_freq).step($binning_interval) do |freq|
cdf2 = cumulative_distribution_function(freq + $binning_interval,
sample_mean, sample_stddev)
cdf1 = cumulative_distribution_function(freq, sample_mean, sample_stddev)
tmp[:prob][freq] = cdf2 - cdf1
tmp[:observed][freq] = samples.reduce(0) do |sum, x|
sum += ((x >= freq && x < freq + $binning_interval) ? 1 : 0)
end
end
multinomial_data.push(tmp)
multinomial_results = multinomial_test([multinomial_data[-1]])
printf("\n")
printf("{| class=\"wikitable\"\n")
printf("! rowspan=2 | DRAM clock speed\n")
printf("! colspan=2 | Percentage of boards failing the lima-memtester test\n")
printf("! rowspan=2 | Theoretical pessimistic upper bound of the failure " +
"percentage using Chebyshev's inequality for lower semivariance <ref name=\"chebyshev\">\n")
printf("If nothing is known about the distribution of samples, then at least\n")
printf("[https://en.wikipedia.org/wiki/Chebyshev's_inequality Chebyshev's inequality]\n")
printf("can be used to get a rough idea about the probabilities of encountering reliability\n")
printf("problems at different DRAM clock speeds. But this method is <u>very conservative</u>\n")
printf("and substantially overestimates probabilities (being too generic has its price).\n")
printf("</ref>\n")
printf("! rowspan=2 | Histogram\n")
printf("|-\n")
printf("! Experimental results\n")
printf("! Theoretical prediction (assuming Gaussian distribution) <ref name=\"gaussian\">\n")
printf("We can assume that the [https://en.wikipedia.org/wiki/Normal_distribution Gaussian distribution]\n")
printf("is a good approximation for our experimental data, calculate theoretical probabilities and do an\n")
printf("[http://www.biostathandbook.com/exactgof.html exact test of goodness-of-fit]\n")
printf("to see if the experimental data does not contradict with the theory.\n")
printf("There is a nice [https://cran.r-project.org/web/packages/XNomial/vignettes/XNomial.html XNomial]\n")
printf("library for R, which can do the job")
if multinomial_results
printf(":<pre>\n#{multinomial_results}</pre>\n")
printf("If the [https://en.wikipedia.org/wiki/P-value p-values] listed above happen to be too low\n")
printf("(less than 0.05) and reject our [https://en.wikipedia.org/wiki/Null_hypothesis null hypothesis]\n")
printf("about having the Gaussian distribution, then Chebyshev's inequality estimates still can be used.\n")
else
printf(" (it was not installed on the system where this report had been generated).\n")
end
printf("</ref>\n")
freq = max_freq
while true
p = cumulative_distribution_function(freq, sample_mean, sample_stddev)
pc = chebyshev_lower_semivariance_p(samples, freq)
break if (p < 0.0001 && pc < 0.01) || freq <= min_freq
freq -= $binning_interval
end
prev_observed = 0
while freq <= max_freq
p = cumulative_distribution_function(freq, sample_mean, sample_stddev)
pc = chebyshev_lower_semivariance_p(samples, freq)
observed = samples.reduce(0) {|sum, x| x <= freq ? sum + 1 : sum }
histogram = "*" * (observed - prev_observed)
prev_observed = observed
color = ""
color = "style=\"color: orange;\"" if p >= 0.01 || pc >= 0.05
color = "style=\"color: red;\"" if observed > 0
printf("|- %s\n| %.0f MHz || %.2f %% (%d/%d) || %.2f %% || %.2f %% || #{histogram}\n",
color,
freq, observed * 100.0 / samples.size, observed, samples.size,
p * 100, pc * 100)
break if p >= 0.99
freq += $binning_interval
end
printf("|}\n")
printf("<references/>\n")
end