Skip to content

Commit

Permalink
Introduce Welford's online algorithm for variance
Browse files Browse the repository at this point in the history
  • Loading branch information
sanmai committed Apr 14, 2023
1 parent 9260c4d commit 2dc155d
Show file tree
Hide file tree
Showing 8 changed files with 611 additions and 15 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/composer.lock
/vendor
/build
/.phpunit.result.cache
*.cache
.?*
2 changes: 1 addition & 1 deletion .phan/config.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use Phan\Issue;

return [
'target_php_version' => '7.1',
'target_php_version' => '7.4',
'backward_compatibility_checks' => false,
'exclude_analysis_directory_list' => [
'vendor/',
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ This rigorously tested library just works. Pipeline neither defines nor throws a

composer require sanmai/pipeline

The latest version requires PHP 7.1 or above, including PHP 8.0 and later.
The latest version requires PHP 7.4 or above, including PHP 8.0 and later.

There are earlier versions that work under PHP 5.6 and above, but they are not as feature complete.

Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
}
],
"require": {
"php": "^7.1 || ^8.0"
"php": "^7.4 || ^8.0"
},
"require-dev": {
"ergebnis/composer-normalize": "^2.8",
Expand Down
138 changes: 138 additions & 0 deletions src/Helper/RunningVariance.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
<?php
/**
* Copyright 2017, 2018 Alexey Kopytko <alexey@kopytko.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

declare(strict_types=1);

namespace Pipeline\Helper;

use function sqrt;

/**
* Computes statistics (such as standard deviation) in real time.
*
* @see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Welford's_online_algorithm
*
* @final
*/
class RunningVariance
{
private const ZERO = 0;

/** The number of observed values. */
private int $count = 0;

/** The mean value. */
private float $mean = 0.0;

/** The aggregated squared distance from the mean. */
private float $m2 = 0.0;

public function __construct(self ...$spiesToMerge)
{
foreach ($spiesToMerge as $spy) {
$this->merge($spy);
}
}

public function observe(float $value): float
{
++$this->count;

$delta = $value - $this->mean;

$this->mean += $delta / $this->count;
$this->m2 += $delta * ($value - $this->mean);

return $value;
}

/**
* The number of observed values.
*/
public function getCount(): int
{
return $this->count;
}

/**
* Get the mean value.
*/
public function getMean(): float
{
if (self::ZERO === $this->count) {
// For no values the variance is undefined.
return NAN;
}

return $this->mean;
}

/**
* Get the variance.
*/
public function getVariance(): float
{
if (self::ZERO === $this->count) {
// For no values the variance is undefined.
return NAN;
}

if (1 === $this->count) {
// Avoiding division by zero: variance for one value is zero.
return 0.0;
}

return $this->m2 / ($this->count - 1);
}

/**
* Compute the standard deviation.
*/
public function getStandardDeviation(): float
{
return sqrt($this->getVariance());
}

/**
* Merge another instance into this instance.
*
* @see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
*/
private function merge(self $other): void
{
// Shortcut a no-op
if (self::ZERO === $other->count) {
return;
}

// Avoid division by zero by copying values
if (self::ZERO === $this->count) {
$this->count = $other->count;
$this->mean = $other->mean;
$this->m2 = $other->m2;

return;
}

$count = $this->count + $other->count;
$delta = $other->mean - $this->mean;

$this->mean = ($this->count * $this->mean) / $count + ($other->count * $other->mean) / $count;
$this->m2 = $this->m2 + $other->m2 + ($delta * $delta * $this->count * $other->count / $count);
$this->count = $count;
}
}
80 changes: 69 additions & 11 deletions src/Standard.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
use Generator;
use Iterator;
use IteratorAggregate;
use Pipeline\Helper\RunningVariance;
use Traversable;
use function array_chunk;
use function array_filter;
Expand Down Expand Up @@ -63,10 +64,8 @@ class Standard implements IteratorAggregate, Countable

/**
* Contructor with an optional source of data.
*
* @param ?iterable $input
*/
public function __construct(iterable $input = null)
public function __construct(?iterable $input = null)
{
// IteratorAggregate is a nuance best we avoid dealing with.
// For example, CallbackFilterIterator needs a plain Iterator.
Expand All @@ -79,10 +78,8 @@ public function __construct(iterable $input = null)

/**
* Appends the contents of an interable to the end of the pipeline.
*
* @param ?iterable $values
*/
public function append(iterable $values = null): self
public function append(?iterable $values = null): self
{
// Do we need to do anything here?
if ($this->willReplace($values)) {
Expand All @@ -108,10 +105,8 @@ public function push(...$vector): self

/**
* Prepends the pipeline with the contents of an iterable.
*
* @param ?iterable $values
*/
public function prepend(iterable $values = null): self
public function prepend(?iterable $values = null): self
{
// Do we need to do anything here?
if ($this->willReplace($values)) {
Expand Down Expand Up @@ -140,7 +135,7 @@ public function unshift(...$vector): self
*
* Utility method for appending/prepending methods.
*/
private function willReplace(iterable $values = null): bool
private function willReplace(?iterable $values = null): bool
{
// Nothing needs to be done here.
/** @phan-suppress-next-line PhanTypeComparisonFromArray */
Expand Down Expand Up @@ -263,7 +258,7 @@ public function chunk(int $length, bool $preserve_keys = false): self
}

/**
* @psalm-param positive-int $length
* @psalm-param positive-int $length
*/
private static function toChunks(Generator $input, int $length, bool $preserve_keys): iterable
{
Expand Down Expand Up @@ -992,4 +987,67 @@ private static function flipKeysAndValues(iterable $previous): iterable
yield $value => $key;
}
}

/**
* Feeds in an instance of RunningVariance.
*
* @param RunningVariance $variance the instance of RunningVariance
* @param ?callable $castFunc the cast callback, returning ?float; null values are not counted
*
* @return $this
*/
public function feedRunningVariance(RunningVariance $variance, ?callable $castFunc = null)
{
if (null === $castFunc) {
$castFunc = 'floatval';
}

return $this->cast(static function ($value) use ($variance, $castFunc) {
$float = $castFunc($value);

if (null !== $float) {
$variance->observe($float);
}

// Returning the original value here
return $value;
});
}

public function onlineVariance(?callable $castFunc = null): RunningVariance
{
$variance = new RunningVariance();

$this->feedRunningVariance($variance, $castFunc);

return $variance;
}

public function variance(?callable $castFunc = null): RunningVariance
{
$variance = new RunningVariance();

if (null === $this->pipeline) {
// No-op: null.
return $variance;
}

if ([] === $this->pipeline) {
// No-op: an empty array.
return $variance;
}

$this->feedRunningVariance($variance, $castFunc);

if (is_array($this->pipeline)) {
// We are done!
return $variance;
}

foreach ($this->pipeline as $_) {
// Discard
}

return $variance;
}
}

0 comments on commit 2dc155d

Please sign in to comment.