Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
415 lines (370 sloc) 15.6 KB
#!/bin/bash
# shellcheck disable=SC2039
# Purpose: This script attempts to generate random integers through any means necessary
# Not to be used for any cryptography, it just generates random integers, that's all!
# Author: Rawiri Blundell
# Copyright: (c) 2016 - Beerware As-Is.
# No warranty or liability, but some attribution would be nice if it works well
# for you or you derive your own code. Just as I've attributed inspiration below.
# Date: 20160222
# Interpreter: This may have a bash shebang, but should work fine in any POSIX compatible shell
# #!/usr/bin/env ksh or #!/bin/ksh will probably get you working on almost anything
###############################################################################
# Inspiration taken from:
# Colin Riddel provided the perl one liner that started this
# 'rand' by Heiner Steven:
# http://www.shelldorado.com/scripts/cmds/rand
# 'rand' by Malte Skoruppa:
# http://unix.stackexchange.com/questions/157250/how-to-efficiently-generate-large-uniformly-distributed-random-integers-in-bas
# 'randbits' by Gene Spafford:
# http://www.diablotin.com/librairie/networking/puis/ch23_09.htm
###############################################################################
# Set the default variable states
nMin=1
debug=false
nCount=1
repeat=false
zeroPad=false
tmpDir=/tmp/rand
# In case we actually use $tmpDir, let's trap and delete it
trap 'rm -rf "${tmpDir}"' EXIT INT TERM HUP
# Figure out our default nMax
if printf "%d\n" "2147483648" 2>&1 | egrep "out of range|too large" >/dev/null 2>&1; then
nMax=2147483647 #2^31-1
elif printf "%d\n" "9223372036854775808" 2>&1 | egrep "out of range|too large" >/dev/null 2>&1; then
nMax=9223372036854775807 #2^63-1
else
nMax=32767 #2^15-1, super-safe mode
fi
# And save it
dnMax="${nMax}"
# Getopts
while getopts "dhm:M:N:rz" Flags; do
case "${Flags}" in
(d) debug="true";;
(h) printf "%s\n" "" "rand - generate random positive integers" \
"" "Optional Arguments:" \
"-d [debug. Tells you which processing method is used (Default:off)]" \
"-h [help]" \
"-m [minimum number (Default:${nMin})]" \
"-M [maximum number (Default:${nMax})]" \
"-N [count. Number of numbers (Default:${nCount})]" \
"-r [repeat. Output lines may be repeated (Default:off)]" \
"-z [zero pad single digits. E.g. '9' becomes '09' (Default:off)]" ""
exit 0;;
(m) case "${OPTARG}" in
(*[!0-9]*|'') printf "%s\n" "[ERROR] rand: (-m) '${OPTARG}' is not a number"; exit 1;;
(*) nMin="${OPTARG}";;
esac;;
(M) case "${OPTARG}" in
(*[!0-9]*|'') printf "%s\n" "[ERROR] rand: (-M) '${OPTARG}' is not a number"; exit 1;;
(*) nMax="${OPTARG}";;
esac;;
(N) case "${OPTARG}" in
(*[!0-9]*|'') printf "%s\n" "[ERROR] rand: (-N) '${OPTARG}' is not a number"; exit 1;;
(*) nCount="${OPTARG}";;
esac;;
(r) repeat=true;;
(z) zeroPad=true;;
(\?) echo "[ERROR] rand: Invalid option: '-$OPTARG'. Try 'rand -h' for usage."
exit 1;;
(:) echo "[INFO] rand: Option '-$OPTARG' requires an argument. e.g. '-$OPTARG 10'"
exit 1;;
esac
done
# Easter Egg
if printf "%d\n" "${nMax}" 2>&1 | egrep "out of range|too large" >/dev/null 2>&1; then
printf "%s\n" "[INFO] rand: Come on now, stop being silly." \
"My upper boundary is *sometimes* '${dnMax}'."
exit 1
fi
# Double check that we haven't done something silly like have nMax less than nMin
if [ "${nMin}" -ge "${nMax}" ]; then
printf "%s\n" "[ERROR] rand: (-m) minimum value is greater than or equal to (-M) maximum value" 1>&2
exit 1
fi
# Put these vars into the environment so we can import them to perl etc
export nMin nMax nCount
################################################################################
# Functions
################################################################################
# This function ensures equal distribution for methods that don't support it
# If you request random numbers between 1 and 10, you should only get a complete
# set of random numbers between 1 and 10. This is disabled with '-r'
# Note: for POSIX compat, we sadly can't use arrays (can we test for that capability?)
# this means there will be potentially a massive performance hit at scale
Fn_fill() {
# Capture the requested number of integers
maxCount="${nCount}"
# Multiply the original count to improve the chances of unique elements being selected
if [ "${nCount}" -ge 100 ]; then
nCount=$(( nCount * 2 ))
fi
# Call the method specified upon function invocation
# Use Fn_unique to get an unsorted list of unique integers
initFill=$($1 | Fn_unique)
# Count how many unique integers we have generated
initCount=$(printf "%s\n" "${initFill}" | wc -l)
# If we've generated enough to satisfy nCount, then we just print them out
if [ "${initCount}" -ge "${maxCount}" ]; then
printf "%s\n" "${initFill}" | head -n "${maxCount}"
# Otherwise, we walk through a few steps to try and quickly fill the gap
else
# Create a tmpdir
mkdir -p "${tmpDir}"
# Now let's make these files, starting by dumping what we already have
printf "%s\n" "${initFill}" > "${tmpDir}"/rnginit
# Next, let's generate a list of what's missing
printf "%s\n" "${initFill}" "$(seq "${nMin}" "${nMax}")" | sort | uniq -u > "${tmpDir}"/rngdiff
# Split up rngdiff on a 70/30 ratio and rotate the two segments
# This reduces the opportunity for recognisable patterns to creep in
# We start a subshell
#(
# Now we cd to the tmpDir
# cd "${tmpDir}" || exit 1
# We split at the number of lines calculated as 70% of the total
# 'split' will generate two files: xaa (70%) and xab (the remainder i.e. 30%)
# split -l $(( $(wc -l < "${tmpDir}"/rngdiff) * 70 / 100 )) | "${tmpDir}"/rngdiff
# Now we use 'tac' to invert both files and merge them back together
# tac xab xaa > "${tmpDir}"/rngdiff
#)
# Interleave the two using 'paste' and dump them out
paste -d '\n' "${tmpDir}"/rnginit "${tmpDir}"/rngdiff | grep . | head -n "${maxCount}"
fi
}
# This function allows us to print out unsorted, unique integers
Fn_unique() {
# If 'awk' is available, we use it
if command -v awk >/dev/null 2>&1; then
awk '!x[$0]++'
# Otherwise we use a double sort. This can be brutally slow at scale.
# We first prepend each line with a line number, then perform a unique sort
# on the second field (i.e. the generated integers), then we sort again on the
# line numbers to return the randomness and use cut to print out the integers
else
nl | sort -k 2 -u | sort | cut -f2
fi
}
# Setup the zeropad function
if [ "${zeroPad}" = true ]; then
Fn_zeropad() {
# It appears that 'awk' is more portable vs sed 's/\<[0-9]\>/0&/'
if command -v awk >/dev/null 2>&1; then
awk '{$1 = sprintf("%02d", $1); print}'
else
printf "%s\n" "[ERROR] rand: 'awk' is required for zero-padding but was not found."
exit 1
fi
}
else
Fn_zeropad() {
cat -
}
fi
# Function to generate numbers using 'gawk'
method_gawk() {
gawk -v min="${nMin}" -v max="${nMax}" -v nNum="${nCount}" 'BEGIN{srand(systime() + PROCINFO["pid"]); i = 0; while (i < nNum) { print int(min+rand()*(max-min)); ++i} }'
}
# Function to generate numbers using BSD 'jot'
method_jot() {
jot -w %i -r "${nCount}" "${nMin}" "${nMax}"
}
# Function to generate numbers using 'nawk' or 'mawk'
method_nawk() {
if command -v mawk >/dev/null 2>&1; then
mawk -v min="${nMin}" -v max="${nMax}" -v nNum="${nCount}" -v seed="$(date +%Y%M%d%H%M%S)" 'BEGIN {srand(seed); i = 0; while (i < nNum) { print int(min+rand()*(max-min)); ++i} }'
else
nawk -v min="${nMin}" -v max="${nMax}" -v nNum="${nCount}" -v seed="$(date +%Y%M%d%H%M%S)" 'BEGIN {srand(seed); i = 0; while (i < nNum) { print int(min+rand()*(max-min)); ++i} }'
fi
}
# Function to generate numbers using 'perl'
method_perl() {
perl -le '$mn=$ENV{nMin}; $mx=$ENV{nMax}; $cn=$ENV{nCount}; foreach my $i (1..$cn) { printf "%.0f\n", int(rand($mx-$mn))+$mn ; }'
}
# Function to generate numbers using 'python'
method_python() {
python -c "for _ in xrange(${nCount}): import random; print random.randint(${nMin},${nMax})"
# The alternative method below was tested but abandoned for now
# It fails with a Memory Error if nMax is too high
# I'll leave it here for prosperity though (note: it may require nMax = nMax + 1)
#python -c "import random; numlist=list(range(${nMin},${nMax})); random.shuffle(numlist); print '\n'.join([str(int) for int in numlist])"
}
# Function to generate numbers primarily using '$RANDOM' special variable
# If the special variable isn't available (e.g. dash shell), we fall back to a BSD-style
# Linear congruential generator (LCG) which we use to create our own '$RANDOM' variable
# This way, the entire bitshifting formula remains the same
# See: https://rosettacode.org/wiki/Linear_congruential_generator
method_RANDOM() {
# We need to know the number of bits required to represent nMax (i.e. bitlength)
# This is for the rightwards bitshift
logn=1
nBitlen=0
while [ $((nMax - nMin)) -gt "${logn}" ] && [ "${logn}" -gt 0 ]; do
logn=$(( logn * 2 ))
nBitlen=$(( nBitlen + 1 ))
done
# We set the initial seed just in case we're using the LCG
# First we check if /dev/urandom is available.
# We used to have a method_urandom. /dev/urandom can generate numbers fast
# But selecting numbers in ranges etc made for a fairly slow method
if [ -c /dev/urandom ] && command -v od >/dev/null 2>&1; then
# Get a string of bytes from /dev/urandom using od
rnSeed=$(od -N 4 -A n -t uL /dev/urandom | tr -d " ")
# Otherwise we can just seed it using the epoch
else
rnSeed=$(date +%s)
fi
# Start a loop based on nCount. I thought of other ways, but this works easier
# and can be made more portable if testing proves the need to do so
while [ "${nCount}" -gt 0 ]; do
# Start generating seeds for the LCG
rnSeed=$(( (1103515245 * rnSeed + 12345) % 2147483648 ))
# If the RANDOM variable is blank, we failover to the LCG
# We print as an unsigned integer to ensure that it's a positive number
RANDOM="${RANDOM:-$(printf "%u\n" "$(( rnSeed / 65536 ))")}"
# Set our initial bitlength
rndBitlen=15
rnd="${RANDOM}" # Capture one output sample of RANDOM
while [ "${rndBitlen}" -lt "${nBitlen}" ]; do
# Stir the seed again just in case
rnSeed=$(( (1103515245 * rnSeed + 12345) % 2147483648 ))
# If two invocations of RANDOM are the same, then we're working with a
# shell that does not support $RANDOM. So we use the LCG and rotate $RANDOM
if [ "${RANDOM}" = "${RANDOM}" ]; then
RANDOM=$(printf "%u\n" "$(( rnSeed / 65536 ))")
fi
# Bitshift RANDOM to the left to stack it i.e. 15 int -> 30 int -> 45 int etc
rnd=$(( rnd<<15|RANDOM ))
# Keep stacking until the while loop exits
rndBitlen=$(( rndBitlen + 15 ))
done
# Now bitshift it right
nRandShift=$(( rnd>>(rndBitlen-nBitlen) ))
# Next we test if the number we've generated fits into our range. If so,
# then we can use it and iterate the while loop
# May need to add a check to
if [ $((nRandShift + nMin)) -le "${nMax}" ]; then
printf "%u\n" "$(( nRandShift + nMin ))"
nCount=$(( nCount - 1 ))
fi
done
}
# Function to generate numbers using GNU 'shuf'
method_shuf() {
# It turns out that Solaris 11 comes with 'shuf' v8.16, which lacks the
# '-r' option. This option was introduced in v8.22
# Once again, Solaris proves to be the bane of my scripting existence.
# First we test if the repeat option has been set, as this requires special handling
if [ "${repeat}" = true ]; then
# Test if 'shuf' can use '-r' and if so, use it
if shuf -n 1 -r -i 1-10 >/dev/null 2>&1; then
shuf -n "${nCount}" -r -i "${nMin}"-"${nMax}"
# Otherwise we assume that '-r' isn't available, and do it the old fashioned way
else
while [ "${nCount}" -gt 0 ]; do
shuf -n 1 -i "${nMin}"-"${nMax}"
# Decrement the counter
nCount=$(( nCount - 1 ))
done
fi
# If repeat isn't true, just do this
else
shuf -n "${nCount}" -i "${nMin}"-"${nMax}"
fi
}
# Check if 'seq' is available, if not, provide a basic replacement function
# Note: this has been stripped back to cater only for ascending sequences
# A fuller, bash-friendly version is available at https://github.com/rawiriblundell
if ! command -v seq >/dev/null 2>&1; then
seq() {
i=$1
while [ "$i" -ne "$(( $2 + 1 ))" ]; do
printf "%s\n" "$i"
i=$(( i + 1 ))
done
}
fi
###############################################################################
# Main
###############################################################################
# Cater for GNU shuf, nice and fast
# This function will also cater for the repeat option natively
if command -v shuf > /dev/null 2>&1; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'shuf'"
fi
method_shuf
# If we're on a BSD based host, likely 'jot' is available, so let's use it
# 'jot' is limited to 2^31-1 by the arc4random algorithm
# so we also test for an nMax limit based on that
elif command -v jot > /dev/null 2>&1 && [ "${nMax}" -lt 2147483647 ]; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'jot'"
fi
# Repeating generated numbers is the default behaviour of 'jot'
if [ "${repeat}" = true ]; then
method_jot
else
Fn_fill method_jot
fi
# Now we start going less-native and try perl. Very likely to be there,
# so very likely this will be a commonly used option
elif command -v perl > /dev/null 2>&1; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'perl'"
fi
if [ "${repeat}" = true ]; then
method_perl
else
Fn_fill method_perl
fi
# Otherwise, we try python
# We need to ensure that /dev/urandom is available, as python sources it
elif command -v python > /dev/null 2>&1 && [ -c /dev/urandom ]; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'python'"
fi
if [ "${repeat}" = true ]; then
method_python
else
Fn_fill method_python
fi
# No perl or python? Let's try 'gawk'
elif command -v gawk > /dev/null 2>&1; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'gawk'"
fi
if [ "${repeat}" = true ]; then
method_gawk
else
Fn_fill method_gawk
fi
# No gawk? Surely 'nawk' or 'mawk' is hanging around? Works very similar, but
# because we don't have systime() we have to replicate it as a seed for srand().
elif command -v nawk mawk > /dev/null 2>&1; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'nawk/mawk'"
fi
if [ "${repeat}" = true ]; then
method_nawk
else
Fn_fill method_nawk
fi
# Note: oawk does not have srand() or rand(), it's more trouble than it's worth so let's move on
# No shuf, jot, perl, python, gawk, nawk or mawk? Fear not!
# Let's try for a POSIX friendly shell solution. This is limited to 2^60-1
elif [ "${nMax}" -lt 1152921504606846975 ]; then
if [ "${debug}" = true ]; then
printf "%s\n" "[DEBUG] rand: Method used is 'Skoruppa bitshift'"
fi
if [ "${repeat}" = true ]; then
method_RANDOM
else
Fn_fill method_RANDOM
fi
# Provide an outright failure condition just in case
else
printf "%s\n" "[ERROR] rand: Unable to find a suitable method to generate a random integer"
exit 1
fi | Fn_zeropad
exit 0