From 868a3a069c1a512cd1cae8a4ef4b0287f7af134f Mon Sep 17 00:00:00 2001 From: Tom MacWright Date: Sun, 9 Aug 2015 23:44:49 -0400 Subject: [PATCH] Examples, tests, changelog --- CHANGELOG.md | 2 ++ index.js | 1 + src/ckmeans.js | 10 +++++++--- src/perceptron.js | 14 ++++++++++++++ src/sample_standard_deviation.js | 3 +++ src/sum_nth_power_deviations.js | 5 +++++ src/variance.js | 15 ++++----------- src/z_score.js | 2 ++ test/sum_nth_power_deviations.test.js | 25 +++++++++++++++++++++++++ test/z_score.test.js | 13 +++++++++++++ 10 files changed, 76 insertions(+), 14 deletions(-) create mode 100644 test/sum_nth_power_deviations.test.js create mode 100644 test/z_score.test.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 664e1372..7e35d7d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,8 @@ var breaks = ss.ckmeans([1, 2, 4, 5, 7, 9, 10, 20], 3)).map(function(cluster) { * Ckmeans replaces Jenks * `sortedUniqueCount` provides an extremely fast method for counting unique values of sorted arrays. +* `sumNthPowerDeviations` is now exposed, providing a simple way to calculate + the fundamental aspect of measures like variance and skewness. ### Non-Breaking Changes diff --git a/index.js b/index.js index 603ee358..1813c2e6 100644 --- a/index.js +++ b/index.js @@ -25,6 +25,7 @@ ss.shuffleInPlace = require('./src/shuffle_in_place'); ss.sample = require('./src/sample'); ss.ckmeans = require('./src/ckmeans'); ss.sortedUniqueCount = require('./src/sorted_unique_count'); +ss.sumNthPowerDeviations = require('./src/sum_nth_power_deviations'); // sample statistics ss.sampleCovariance = require('./src/sample_covariance'); diff --git a/src/ckmeans.js b/src/ckmeans.js index ffe22a04..ba8d539b 100644 --- a/src/ckmeans.js +++ b/src/ckmeans.js @@ -36,6 +36,9 @@ function makeMatrix(columns, rows) { * Minimizing the difference within groups - what Wang & Song refer to as * `withinss`, or within sum-of-squares, means that groups are optimally * homogenous within and the data is split into representative groups. + * This is very useful for visualization, where you may want to represent + * a continuous variable in discrete color or style groups. This function + * can provide groups that emphasize differences between data. * * Being a dynamic approach, this algorithm is based on two matrices that * store incrementally-computed values for squared deviations and backtracking @@ -55,9 +58,10 @@ function makeMatrix(columns, rows) { * @param {number} nClusters number of desired classes. This cannot be * greater than the number of values in the data array. * @returns {Array>} clustered input - * @examples - * // split data into 3 break points - * jenks([1, 2, 4, 5, 7, 9, 10, 20], 3) // = [1, 7, 20, 20] + * @example + * ckmeans([-1, 2, -1, 2, 4, 5, 6, -1, 2, -1], 3); + * // The input, clustered into groups of similar numbers. + * //= [[-1, -1, -1, -1], [2, 2, 2], [4, 5, 6]]); */ function ckmeans(data, nClusters) { diff --git a/src/perceptron.js b/src/perceptron.js index d5c8b6b8..3040504c 100644 --- a/src/perceptron.js +++ b/src/perceptron.js @@ -5,6 +5,20 @@ * arrays of numbers and predicts whether they should be classified * as either 0 or 1 (negative or positive examples). * @class + * @example + * // Create the model + * var p = new PerceptronModel(); + * // Train the model with input with a diagonal boundary. + * for (var i = 0; i < 5; i++) { + * p.train([1, 1], 1); + * p.train([0, 1], 0); + * p.train([1, 0], 0); + * p.train([0, 0], 0); + * } + * p.predict([0, 0]); // 0 + * p.predict([0, 1]); // 0 + * p.predict([1, 0]); // 0 + * p.predict([1, 1]); // 1 */ function PerceptronModel() { // The weights, or coefficients of the model; diff --git a/src/sample_standard_deviation.js b/src/sample_standard_deviation.js index 9841f999..622b1639 100644 --- a/src/sample_standard_deviation.js +++ b/src/sample_standard_deviation.js @@ -8,6 +8,9 @@ var sampleVariance = require('./sample_variance'); * * @param {Array} x input array * @returns {number} sample standard deviation + * @example + * ss.sampleStandardDeviation([2, 4, 4, 4, 5, 5, 7, 9]); + * //= 2.138 */ function sampleStandardDeviation(x) { // The standard deviation of no numbers is null diff --git a/src/sum_nth_power_deviations.js b/src/sum_nth_power_deviations.js index f139d6d6..e0dacef4 100644 --- a/src/sum_nth_power_deviations.js +++ b/src/sum_nth_power_deviations.js @@ -10,6 +10,11 @@ var mean = require('./mean'); * @param {Array} x * @param {number} n power * @returns {number} sum of nth power deviations + * @example + * var input = [1, 2, 3]; + * // since the variance of a set is the mean squared + * // deviations, we can calculate that with sumNthPowerDeviations: + * var variance = sumNthPowerDeviations(input) / input.length; */ function sumNthPowerDeviations(x, n) { var meanValue = mean(x), diff --git a/src/variance.js b/src/variance.js index e8c88edc..8f169b93 100644 --- a/src/variance.js +++ b/src/variance.js @@ -1,6 +1,6 @@ 'use strict'; -var mean = require('./mean'); +var sumNthPowerDeviations = require('./sum_nth_power_deviations'); /** * The [variance](http://en.wikipedia.org/wiki/Variance) @@ -19,16 +19,9 @@ function variance(x) { // The variance of no numbers is null if (x.length === 0) { return null; } - var meanValue = mean(x), - deviations = []; - - // Make a list of squared deviations from the mean. - for (var i = 0; i < x.length; i++) { - deviations.push(Math.pow(x[i] - meanValue, 2)); - } - - // Find the mean value of that list - return mean(deviations); + // Find the mean of squared deviations between the + // mean value and each value. + return sumNthPowerDeviations(x, 2) / x.length; } module.exports = variance; diff --git a/src/z_score.js b/src/z_score.js index d76a8a13..cc9453e3 100644 --- a/src/z_score.js +++ b/src/z_score.js @@ -20,6 +20,8 @@ * @param {number} mean * @param {number} standardDeviation * @return {number} z score + * @example + * ss.zScore(78, 80, 5); //= -0.4 */ function zScore(x, mean, standardDeviation) { return (x - mean) / standardDeviation; diff --git a/test/sum_nth_power_deviations.test.js b/test/sum_nth_power_deviations.test.js new file mode 100644 index 00000000..bdcb74f4 --- /dev/null +++ b/test/sum_nth_power_deviations.test.js @@ -0,0 +1,25 @@ +/* eslint no-shadow: 0 */ +'use strict'; + +var test = require('tape'); +var ss = require('../'); + +function rnd(x) { + return Math.round(x * 1000) / 1000; +} + +test('sumNthPowerDeviations', function(t) { + t.equal( + ss.sumNthPowerDeviations([0, 0, 0], 2), + 0); + t.equal( + ss.sumNthPowerDeviations([0, 1], 2), + 0.5); + t.equal( + ss.sumNthPowerDeviations([0, 1], 3), + 0); + t.equal( + ss.sumNthPowerDeviations([0, 1, 2], 2), + 2); + t.end(); +}); diff --git a/test/z_score.test.js b/test/z_score.test.js new file mode 100644 index 00000000..59300cc9 --- /dev/null +++ b/test/z_score.test.js @@ -0,0 +1,13 @@ +/* eslint no-shadow: 0 */ +'use strict'; + +var test = require('tape'); +var ss = require('../'); + +// The zScore method is also tested in the normal distribution tests. +test('zScore', function(t) { + t.equal(ss.zScore(78, 80, 5), -0.4); + t.equal(ss.zScore(78, 90, 5), -2.4); + t.equal(ss.zScore(78, 90, 2), -6); + t.end(); +});