Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
481 lines (423 sloc) 18.5 KB
#!/bin/bash
# shellcheck disable=SC2039
# Purpose: This script attempts to generate random integers through any means necessary
# Not to be used for any cryptography, it just generates random integers, that's all!
# Author: Rawiri Blundell
# Copyright: (c) 2016 - Beerware As-Is.
# No warranty or liability, but some attribution would be nice if it works well
# for you or you derive your own code. Just as I've attributed inspiration below.
# Date: 20160222
# Requires: At the very least, an interpreter with bitshifting. This isn't strict POSIX.
# Interpreter: This may have a bash shebang, but should work fine in any POSIX compatible shell
# #!/usr/bin/env ksh or #!/bin/ksh will probably get you working on almost anything
###############################################################################
# Inspiration taken from:
# Colin Riddel provided the perl one liner that started this
# 'rand' by Heiner Steven:
# http://www.shelldorado.com/scripts/cmds/rand
# 'rand' by Malte Skoruppa:
# http://unix.stackexchange.com/questions/157250/how-to-efficiently-generate-large-uniformly-distributed-random-integers-in-bas
# 'randbits' by Gene Spafford:
# http://www.diablotin.com/librairie/networking/puis/ch23_09.htm
###############################################################################
# Set the default variable states
nMin=1
debug=false
nCount=1
repeat=false
zeroPad=false
tmpDir=/tmp/rand
# Simple 'die' function
die() {
printf '%s\n' "$@" 1>&2
exit 1
}
# In case we actually use $tmpDir, let's trap and delete it
trap 'rm -rf "${tmpDir}"' EXIT INT TERM HUP
# Figure out our default maxRand, using 'getconf'
if [ "$(getconf LONG_BIT)" -eq 64 ]; then
# 2^63-1
maxRand=9223372036854775807
elif [ "$(getconf LONG_BIT)" -eq 32 ]; then
# 2^31-1
maxRand=2147483647
else
# 2^15-1
maxRand=32767
fi
# Override for mksh, which uses 32-bit arithmetic
# This test has to be separate to keep dash happy
case "${KSH_VERSION}" in
(*"MIRBSD"*) maxRand=2147483647;;
esac
# Getopts
while getopts ":dDhm:M:N:rz:" Flags; do
case "${Flags}" in
(d) debug="true";;
(D) debug="true"; set -x;;
(h) printf '%s\n' "" "rand - generate random positive integers" \
"" "Optional Arguments:" \
"-d [debug. Tells you which processing method is used (Default:off)]" \
"-h [help]" \
"-m [minimum number (Default:${nMin})]" \
"-M [maximum number (Default:${maxRand})]" \
"-N [count. Number of numbers (Default:${nCount})]" \
"-r [repeat. Output lines may be repeated (Default:off)]" \
"-z [printf style zero padding. E.g. With '-z 2', '9' becomes '09' (Default:off)]" ""
exit 0;;
(m) case "${OPTARG}" in
(*[!0-9]*|'') die "[ERROR] rand: (-m) '${OPTARG}' is not a number";;
(*) nMin="${OPTARG}";;
esac;;
(M) case "${OPTARG}" in
(*[!0-9]*|'') die "[ERROR] rand: (-M) '${OPTARG}' is not a number";;
(*) nMax="${OPTARG}";;
esac;;
(N) case "${OPTARG}" in
(*[!0-9]*|'') die "[ERROR] rand: (-N) '${OPTARG}' is not a number";;
(*) nCount="${OPTARG}";;
esac;;
(r) repeat=true;;
(z) case "${OPTARG}" in
(*[!0-9]*|'') die "[ERROR] rand: (-z) '${OPTARG}' is not a number";;
(*) zeroPad="${OPTARG}";;
esac;;
(\?) die "[ERROR] rand: Invalid option: '-$OPTARG'. Try 'rand -h' for usage.";;
(:) die "[ERROR] rand: Option '-$OPTARG' requires an argument. e.g. '-$OPTARG 10'";;
esac
done
# In case nMax is blank, default to maxRand
[ -z "${nMax}" ] && nMax="${maxRand}"
# Easter Egg
if printf '%d\n' "${nMax}" 2>&1 | grep -E "out of range|too large" >/dev/null 2>&1; then
die "[INFO] rand: Come on now, stop being silly." \
"My upper boundary is *sometimes* '${maxRand}'."
fi
# Double check that we haven't done something silly like have nMax less than nMin
if [ "${nMin}" -ge "${nMax}" ]; then
die "[ERROR] rand: (-m) minimum value is greater than or equal to (-M) maximum value"
fi
# If repeat is not set, then nCount cannot be higher than nMax
if [ "${repeat}" = "false" ] && [ "${nCount}" -gt "${nMax}" ]; then
printf '%s\n' "[INFO] rand: Count (${nCount}) cannot be higher than the maximum boundary (${nMax})." \
"Count will be: ${nMax}. Consider the '-r' option." 1>&2
nCount="${nMax}"
fi
# Put these vars into the environment so we can import them to perl etc
export nMin nMax nCount
################################################################################
# Functions
################################################################################
# This function simply outputs the method used for random integer generation
Fn_debug() {
[ "${debug}" = true ] && printf '%s\n' "[DEBUG] rand: Method used is '$*'" 1>&2
}
# Function to generate a reliable seed for whatever method requires one
# Because 'date +%s' isn't entirely portable, we try other methods to get the epoch
Fn_getSeed() {
# First we check if /dev/urandom is available.
# We used to have a method_urandom. /dev/urandom can generate numbers fast
# But selecting numbers in ranges etc made for a fairly slow method
if [ -c /dev/urandom ] && command -v od >/dev/null 2>&1; then
# Get a string of bytes from /dev/urandom using od
od -N 4 -A n -t uL /dev/urandom | tr -d '\n' | awk '{$1=$1+0};1'
# Otherwise we try to get another seed
elif date '+%s' >/dev/null 2>&1; then
date '+%s'
elif command -v perl >/dev/null 2>&1; then
perl -e "print time"
# Portable workaround based on http://www.etalabs.net/sh_tricks.html
# We take extra steps to try to prevent accidental octal interpretation
else
local secsVar minsVar hourVar dayVar yrOffset yearVar
secsVar=$(TZ=GMT0 date +%S)
minsVar=$(TZ=GMT0 date +%M)
hourVar=$(TZ=GMT0 date +%H)
dayVar=$(TZ=GMT0 date +%j | sed 's/^0*//')
yrOffset=$(( $(TZ=GMT0 date +%Y) - 1600 ))
yearVar=$(( (yrOffset * 365 + yrOffset / 4 - yrOffset / 100 + yrOffset / 400 + dayVar - 135140) * 86400 ))
printf '%s\n' "$(( yearVar + (${hourVar#0} * 3600) + (${minsVar#0} * 60) + ${secsVar#0} ))"
fi
}
# This function ensures equal distribution for methods that don't support it
# If you request random numbers between 1 and 10, you should only get a complete
# set of random numbers between 1 and 10. This is disabled with '-r'
# Note: for POSIX compat, we sadly can't use arrays
# this means there will be potentially a massive performance hit at scale
Fn_fill() {
# Call the method specified upon function invocation
# Use Fn_unique to get an unsorted list of unique integers
intFill=$($1 | Fn_unique)
# Count how many unique integers we have generated
intCount=$(printf '%s\n' "${intFill}" | wc -l)
# If we've generated enough to satisfy nCount, then we just print them out
if [ "${intCount}" -ge "${nCount}" ]; then
printf '%s\n' "${intFill}" | head -n "${nCount}"
# Otherwise, we walk through a few steps to try and quickly fill the gap
else
# Create a tmpdir
mkdir -p "${tmpDir}"
# Now dump what we have to a temporary file
printf '%s\n' "${intFill}" > "${tmpDir}"/rnginit
# Until we have our full set of random integers,
# Keep throwing random ints into the temporary file
while [ "${intCount}" -lt "${nCount}" ]; do
eval "$1" >> "${tmpDir}"/rnginit
intCount=$(Fn_unique < "${tmpDir}"/rnginit | wc -l)
done
# Now that the full integer set is built, dump it out
Fn_unique < "${tmpDir}"/rnginit | head -n "${nCount}"
fi
}
# Switch between Fn_fill or not
Fn_repeat() {
if [ "${repeat}" = true ]; then
"$1"
else
Fn_fill "$1"
fi
}
# This function allows us to print out unsorted, unique integers
Fn_unique() {
# If 'awk' is available, we use it
if command -v awk >/dev/null 2>&1; then
awk '!x[$0]++'
# Otherwise we use a double sort. This can be brutally slow at scale.
# We first prepend each line with a line number, then perform a unique sort
# on the second field (i.e. the generated integers), then we sort again on the
# line numbers to return the randomness and use cut to print out the integers
else
nl | sort -k 2 -u | sort | cut -f2
fi
}
# Setup the zeropad function, which converts numbers under 10 e.g. '9' becomes '09'
if [ -n "${zeroPad}" ]; then
Fn_zeropad() {
# It appears that 'awk' is more portable vs sed 's/\<[0-9]\>/0&/'
if command -v awk >/dev/null 2>&1; then
awk -v z="${zeroPad}" '{$1 = sprintf("%0*d", z, $1); print}'
elif command -v xargs >/dev/null 2>&1; then
xargs printf '%0*d\n' "${zeroPad}"
else
die "[ERROR] rand: 'awk' or 'xargs' is required for zero-padding but neither were found."
fi
}
else
Fn_zeropad() {
cat -
}
fi
# Function to generate numbers using 'gawk'
method_gawk() {
gawk -v min="${nMin}" -v max="${nMax}" -v nNum="${nCount}" 'BEGIN{srand(systime() + PROCINFO["pid"]); i = 0; while (i < nNum) { printf "%d\n", int(min+rand()*(max-min)); ++i} }'
}
# Function to generate numbers using BSD 'jot'
method_jot() {
# Some versions of 'jot' have uniform distribution, others do not.
# See: https://unix.stackexchange.com/a/241199
# This is a lo-fi approach to try and cater for both
jot -w %i -r "${nCount}" "${nMin}" "$(( nMax + 1 ))" | sed "s/$(( nMax + 1 ))/$(( nMax ))/"
}
# Function to generate numbers using 'nawk'
# This used to also call 'mawk' but that was found to be vulnerable to repeating output
method_nawk() {
nawk -v min="${nMin}" -v max="${nMax}" -v nNum="${nCount}" -v seed="$(Fn_getSeed)" 'BEGIN {srand(seed); i = 0; while (i < nNum) { printf "%d\n", int(min+rand()*(max-min)); ++i} }'
}
# Function to generate numbers using 'perl'
method_perl() {
perl -le '$mn=$ENV{nMin}; $mx=$ENV{nMax}; $cn=$ENV{nCount}; foreach my $i (1..$cn) { printf "%.0f\n", int(rand($mx-$mn))+$mn ; }'
}
# Function to generate numbers using 'python'
method_python() {
python -c "for _ in xrange(${nCount}): import random; print random.randint(${nMin},${nMax})"
# Alternative option that loosely follows Daniel Lemire's advice here:
# http://lemire.me/blog/2016/03/21/ranged-random-number-generation-is-slow-in-python/
# The subtraction and addition to cater for specific boundaries appears to destroy any performance improvement though
# The numpy suggestion on that blogpost performed rather poorly for me (Python 2.7)
# python -c "for _ in xrange(${nCount}): import random; print int(random.random() * (${nMax} - ${nMin} + 1) + ${nMin})"
# The alternative method below fails with a Memory Error if nMax is too high
# I'll leave it here for prosperity though (note: it may require nMax = nMax + 1)
#python -c "import random; numlist=list(range(${nMin},${nMax})); random.shuffle(numlist); print '\n'.join([str(int) for int in numlist])"
}
# Function to generate numbers primarily using '$RANDOM' special variable
# If the special variable isn't available (e.g. dash shell), we fall back to a BSD-style
# Linear congruential generator (LCG) which we use to create our own '$RANDOM' variable
# This way, the entire bitshifting formula remains the same
# See: https://rosettacode.org/wiki/Linear_congruential_generator
method_RANDOM() {
# We need to know the number of bits required to represent nMax (i.e. bitlength)
# This is for the rightwards bitshift
logn=1
nBitlen=0
while [ $((nMax - nMin)) -gt "${logn}" ] && [ "${logn}" -gt 0 ]; do
logn=$(( logn * 2 ))
nBitlen=$(( nBitlen + 1 ))
done
# We set the initial seed just in case we're using the LCG
rnSeed=$(Fn_getSeed)
# Start a loop based on nCount.
count=0
while [ "${count}" -lt "${nCount}" ]; do
# Start generating seeds for the LCG
rnSeed=$(( (1103515245 * rnSeed + 12345) % 2147483648 ))
# If the RANDOM variable is blank, we failover to the LCG
# We print as an unsigned integer to ensure that it's a positive number
# shellcheck disable=SC2169
RANDOM="${RANDOM:-$(printf '%u\n' "$(( rnSeed / 65536 ))")}"
# Set our initial bitlength
rndBitlen=15
# shellcheck disable=SC2169
rnd="${RANDOM}" # Capture one output sample of RANDOM
while [ "${rndBitlen}" -lt "${nBitlen}" ]; do
# Stir the seed again just in case
rnSeed=$(( (1103515245 * rnSeed + 12345) % 2147483648 ))
# If two invocations of RANDOM are the same, then we're working with a
# shell that does not support $RANDOM. So we use the LCG and rotate $RANDOM
# shellcheck disable=SC2169
if [ "${RANDOM}" = "${RANDOM}" ]; then
RANDOM=$(printf '%u\n' "$(( rnSeed / 65536 ))")
fi
# Bitshift RANDOM to the left to stack it i.e. 15 int -> 30 int -> 45 int etc
# shellcheck disable=SC2169
rnd=$(( rnd<<15|RANDOM ))
# Keep stacking until the while loop exits
rndBitlen=$(( rndBitlen + 15 ))
done
# Now bitshift it right
nRandShift=$(( rnd>>(rndBitlen-nBitlen) ))
# Next we test if the number we've generated fits into our range. If so,
# then we can use it and iterate the while loop
if [ $((nRandShift + nMin)) -le "${nMax}" ]; then
printf '%u\n' "$(( nRandShift + nMin ))"
count=$(( count + 1 ))
fi
done
}
# Function to generate numbers using GNU 'shuf'
method_shuf() {
# It turns out that Solaris 11 comes with 'shuf' v8.16, which lacks the
# '-r' option. This option was introduced in v8.22
# Once again, Solaris proves to be the bane of my scripting existence.
# First we test if the repeat option has been set, as this requires special handling
if [ "${repeat}" = true ]; then
# Test if 'shuf' can use '-r' and if so, use it
if shuf -n 1 -r -i 1-10 >/dev/null 2>&1; then
shuf -n "${nCount}" -r -i "${nMin}"-"${nMax}"
# Otherwise we assume that '-r' isn't available, and do it the old fashioned way
else
while [ "${nCount}" -gt 0 ]; do
shuf -n 1 -i "${nMin}"-"${nMax}"
# Decrement the counter
nCount=$(( nCount - 1 ))
done
fi
# If repeat isn't true, just do this
else
shuf -n "${nCount}" -i "${nMin}"-"${nMax}"
fi
}
# Function to attempt random integer generation using a textbook Vigna xorshift128+
# This RNG is also coupled with modulo de-biasing loosely from arc4random_uniform
method_xorshift128plus() {
# First, we need to calculate the range of integers we need to create
# i.e. convert from {x..y} to {0..n}
nRange=$(( nMax - nMin + 1 ))
# We attempt to de-bias our modulo as explained here and here and here:
# http://funloop.org/post/2013-07-12-generating-random-numbers-without-modulo-bias.html
# https://blog.hartwork.org/?p=2900
# https://medium.com/hownetworks/dont-waste-cycles-with-modulo-bias-35b6fdafcf94
# This calculation is careful to not invoke negative integer overflow
skipInts=$(( (maxRand - nRange) % nRange ))
# Our initial seed integers, one as per RFC 1149.5, the other from Fn_getseed
int0=4
int1=$(Fn_getSeed)
# Start our count loop
count=0
while [ "${count}" -lt "${nCount}" ]; do
# xorshift128+ with preselected triples
int1=$(( int1 ^ int1 << 23 ))
int1=$(( int1 ^ int1 >> 17 ))
int1=$(( int1 ^ int0 ))
int1=$(( int1 ^ int0 >> 26 ))
seed1="${int1}"
# If our generated int is larger than the number of problematic ints
# Then we can modulo it safely, otherwise drop it and generate again
# By virtue of skipInts being >= 0, this will also drop generated negative overflow ints
# Then we simply add nMin to bring it back up into our desired range
if [ $(( (int0 + seed1) >= skipInts )) ]; then
printf '%u\n' "$(( ((int0 + seed1) % nRange) + nMin ))"
count=$(( count + 1 ))
fi
done
}
# Check if 'seq' is available, if not, provide a basic replacement function
# Note: this has been stripped back to cater only for ascending sequences
# A fuller, bash-friendly version is available at https://github.com/rawiriblundell
if ! command -v seq >/dev/null 2>&1; then
seq() {
i=$1
while [ "$i" -ne "$(( $2 + 1 ))" ]; do
printf '%s\n' "$i"
i=$(( i + 1 ))
done
}
fi
###############################################################################
# Main
###############################################################################
main() {
# Cater for GNU shuf, nice and fast
# This function will also cater for the repeat option natively
if command -v shuf > /dev/null 2>&1; then
# We disallow the 'shuf' step-in function, as it depends on this script
# Chicken, meet egg. Bok bok cluck cluck.
if type shuf | head -n 1 | grep function > /dev/null 2>&1; then
continue
fi
Fn_debug shuf
method_shuf
# If we're on a BSD based host, likely 'jot' is available, so let's use it
# 'jot' is limited to 2^31-1 by the arc4random algorithm
# so we also test for an nMax limit based on that
elif command -v jot > /dev/null 2>&1 && [ "${nMax}" -le 2147483647 ]; then
Fn_debug jot
# Repeating generated numbers is the default behaviour of 'jot'
Fn_repeat method_jot
# Now we start going less-native and try perl. Very likely to be there,
# so very likely this will be a commonly used option
elif command -v perl > /dev/null 2>&1; then
Fn_debug perl
Fn_repeat method_perl
# Otherwise, we try python
# We need to ensure that /dev/urandom is available, as python sources it
elif command -v python > /dev/null 2>&1 && [ -c /dev/urandom ]; then
Fn_debug python
Fn_repeat method_python
# No perl or python? Let's try 'gawk'
elif command -v gawk > /dev/null 2>&1; then
Fn_debug gawk
Fn_repeat method_python
# No gawk? Surely 'nawk' is hanging around? Works very similar, but
# because we don't have systime() we have to replicate it as a seed for srand().
elif command -v nawk > /dev/null 2>&1; then
Fn_debug nawk
Fn_repeat method_nawk
# Note: oawk does not have srand() or rand(), it's more trouble than it's worth so let's move on
# Next we try one of our own methods using a textbook xorshift128+
# coupled with a semi-not-so-textbook modulo debias method
elif [ $(( nCount ^ nCount )) -eq 0 ] >/dev/null 2>&1; then
Fn_debug xorshift128+
Fn_repeat method_xorshift128plus
# No shuf, jot, perl, python, gawk or nawk? AND xorshift isn't working?! Fear not!
# Let's try for a POSIX friendly shell solution. This is limited to 2^60-1
elif [ "${nMax}" -lt 1152921504606846975 ]; then
Fn_debug Skoruppa Bitshift
Fn_repeat method_RANDOM
# Provide an outright failure condition just in case
else
die "[ERROR] rand: Unable to find a suitable method to generate a random integer"
fi | Fn_zeropad
exit 0
}
# Call the main function
main