Skip to content

Commit

Permalink
Merge pull request #76 from tmcw/shuffle
Browse files Browse the repository at this point in the history
Implement Fisher-Yates shuffle for random sub-selections
  • Loading branch information
tmcw committed Aug 20, 2014
2 parents c34c4f9 + f6be6b8 commit 96b6cac
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 2 deletions.
5 changes: 4 additions & 1 deletion .jshintrc
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
{
"indent": 4,
"undef": true,
"unused": true
"unused": true,
"globals": {
"require": true
}
}
15 changes: 15 additions & 0 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,21 @@ into chunks of `chunkSize` size and returns an array of those chunks. This
does not change the input value. If the length of `sample` is not divisible
by `chunkSize`, the last array will be shorter than the rest.

### .shuffle(sample)

Given a `sample` array (with any type of contents), return a random permutation
of that array, using the [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
algorithm.

### .shuffle_in_place(sample)

Given a `sample` array (with any type of contents), return a random permutation
of that array, using the [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
algorithm.

This changes the input array in-place, as well as returns it - unlike `.shuffle()`,
it does not create a shallow copy of the array.

### .quantile_sorted(sample, p)

Does a [quantile](http://en.wikipedia.org/wiki/Quantile) of a dataset `sample`,
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
"jshint": "2.5.3",
"coveralls": "~2.11.1",
"istanbul": "~0.3.0",
"tape": "~2.14.0"
"tape": "~2.14.0",
"random-js": "~1.0.4"
},
"scripts": {
"test": "tape test/*.js",
Expand Down
54 changes: 54 additions & 0 deletions src/simple_statistics.js
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,58 @@
return output;
}

// # shuffle_in_place
//
// A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
// in-place - which means that it will change the order of the original
// array by reference.
function shuffle_in_place(sample, randomSource) {

// a custom random number source can be provided if you want to use
// a fixed seed or another random number generator, like
// [random-js](https://www.npmjs.org/package/random-js)
randomSource = randomSource || Math.random;

// store the current length of the sample to determine
// when no elements remain to shuffle.
var length = sample.length;

// temporary is used to hold an item when it is being
// swapped between indices.
var temporary;

// The index to swap at each stage.
var index;

// While there are still items to shuffle
while (length > 0) {
// chose a random index within the subset of the array
// that is not yet shuffled
index = Math.floor(randomSource() * length--);

// store the value that we'll move temporarily
temporary = sample[length];

// swap the value at `sample[length]` with `sample[index]`
sample[length] = sample[index];
sample[index] = temporary;
}

return sample;
}

// # shuffle
//
// A [Fisher-Yates shuffle](http://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle)
// is a fast way to create a random permutation of a finite set.
function shuffle(sample, randomSource) {
// slice the original array so that it is not modified
sample = sample.slice();

// and then shuffle that shallow-copied array, in place
return shuffle_in_place(sample.slice(), randomSource);
}

// # quantile
//
// This is a population quantile, since we assume to know the entire
Expand Down Expand Up @@ -1410,6 +1462,8 @@
ss.mad = mad;

ss.chunk = chunk;
ss.shuffle = shuffle;
ss.shuffle_in_place = shuffle_in_place;

ss.sample_covariance = sample_covariance;
ss.sample_correlation = sample_correlation;
Expand Down
24 changes: 24 additions & 0 deletions test/shuffle.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
var test = require('tape');
var Random = require('random-js');
var random = new Random(Random.engines.mt19937().seed(0));
var ss = require('../');

function rng() { return random.real(0, 1); }

test('shuffle', function(t) {
var input = [1, 2, 3, 4, 5, 6];
t.deepEqual(ss.shuffle([], rng), []);
t.deepEqual(ss.shuffle(input, rng), [1, 5, 3, 2, 4, 6]);
t.deepEqual(input, [1, 2, 3, 4, 5, 6], 'does not change original array');
t.deepEqual(ss.shuffle(input, rng), [5, 4, 1, 3, 6, 2]);
t.deepEqual(input, [1, 2, 3, 4, 5, 6], 'does not change original array');
t.end();
});

test('shuffle_in_place', function(t) {
var input = [1, 2, 3, 4, 5, 6];
t.deepEqual(ss.shuffle_in_place([], rng), []);
t.deepEqual(ss.shuffle_in_place(input, rng), [6, 1, 5, 2, 4, 3]);
t.deepEqual(input, [6, 1, 5, 2, 4, 3], 'changes original array');
t.end();
});

0 comments on commit 96b6cac

Please sign in to comment.