diff --git a/.gitattributes b/.gitattributes index 0fcf7b8..1314fc6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,9 +1,9 @@ /.gitignore export-ignore /.gitattributes export-ignore +/phpcs.xml export-ignore /phpunit.xml export-ignore /psalm.xml export-ignore /psalm.baseline.xml export-ignore -/phpcs.xml export-ignore /.phive export-ignore /test export-ignore diff --git a/README.md b/README.md index 827cedb..5863a3d 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The constructor of `LargeArrayBuffer` provides some options: E.g.: `new LargeArrayBuffer(512);` to set a 512 MiB threshold. 1. You can enable GZIP compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);` -### Read your data from the buffer +### Read from the buffer There are several options to read the data: @@ -68,6 +68,31 @@ There are some stats you can obtain: To put it in one sentence: This library uses [php://temp](https://www.php.net/manual/en/wrappers.php.php) as well as PHP's [serialize](https://www.php.net/manual/en/function.serialize.php)/[unserialize](https://www.php.net/manual/en/function.unserialize.php) functions to store an array on disk if it gets too large. +## Limitations and concerns + +- associative arrays are not supported +- the item type needs to be compatible with PHP's [serialize](https://www.php.net/manual/en/function.serialize.php)/[unserialize](https://www.php.net/manual/en/function.unserialize.php) functions +- since storage drives (even PCIe SSDs) are a lot slower than memory and de-/serialization needs to be done, you trade hard memory overflows for performance losses + +### Benchmark + +A benchmark with 1 million measurements (consisting of DateTimeImmutable, int and float) using PHP 8.2 with 10 iterations comparing a normal array with the LargeArrayBuffer gave the following results (LargeArrayBuffer was configured with a memory limit of 256 MiB): + +| Action | Consumed time | Consumed memory | Buffer size | +|--------|---------------|-----------------|-------------| +| Fill array | 1.65 s | 476 MiB | NA | +| Iterate over array | 0.14 s | 478 MiB | NA | +| Fill buffer | 10.43 s | 0 B | 378.7 MiB | +| Iterate over buffer | 4.67 s | 0 B | 378.7 MiB | + +Note: + +- The peak memory usage using the buffer is about its memory limit. The table shows the memory usage after the specified action. +- PHP seems to cache the array once it is created for the first time, although `unset` is used. That is why I have not put the average value in the table for this specific value but the maximum (first run). +- The serialized data is smaller than the binary data in memory. I have absolutly no idea why. + +To reproduce call bench/benchmark.php. + ## License This library is licensed under the MIT License (MIT). Please see [LICENSE](LICENSE) for more information. diff --git a/bench/LargeArrayBufferBench.php b/bench/LargeArrayBufferBench.php new file mode 100644 index 0000000..a7bfdfa --- /dev/null +++ b/bench/LargeArrayBufferBench.php @@ -0,0 +1,57 @@ +count = $count; + } + + private function generateMeasurement(int $index): Measurement { + return new Measurement( + (new \DateTimeImmutable())->sub(new \DateInterval('PT'.$index.'H')), + $index % 500, + random_int(-1_000_000, 1_000_000) / 1000);; + } + + public function arrayMeasurementsFill(): array { + $arr = []; + for($i = 0; $i < $this->count; $i++){ + $arr[] = $this->generateMeasurement($i); + } + return $arr; + } + + public function arrayMeasurementsIterate(array $arr): void { + foreach($arr as $index => $item){ + $index; + $item; + } + } + + public function bufferMeasurementsFill(LargeArrayBuffer $buf): void { + for($i = 0; $i < $this->count; $i++){ + $buf->push($this->generateMeasurement($i)); + } + } + + public function bufferMeasurementsIterate(LargeArrayBuffer $buf): void { + foreach($buf as $index => $item){ + $index; + $item; + } + } +} diff --git a/bench/benchmark.php b/bench/benchmark.php new file mode 100644 index 0000000..8e008d3 --- /dev/null +++ b/bench/benchmark.php @@ -0,0 +1,88 @@ +arrayMeasurementsFill(); + $metrics['fill_array'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore + ]; + + $start = microtime(true); + $bench->arrayMeasurementsIterate($arr); + $metrics['iterate_array'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore + ]; + unset($arr); + + $start = microtime(true); + $memBefore = memory_get_usage(true); + $buf = new LargeArrayBuffer(256); + $bench->bufferMeasurementsFill($buf); + $time = microtime(true) - $start; + $mem = memory_get_usage(true) - $memBefore; + $metrics['fill_buffer'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + + $start = microtime(true); + $bench->bufferMeasurementsIterate($buf); + $time = microtime(true) - $start; + $mem = memory_get_usage(true) - $memBefore; + $metrics['iterate_buffer'][] = [ + 'time' => microtime(true) - $start, + 'mem' => memory_get_usage(true) - $memBefore, + 'size' => $buf->getSize() + ]; + unset($buf); + + unset($bench); +} + +printResult('Fill array', $metrics, 'fill_array', 2); +printResult('Iterate over array', $metrics, 'iterate_array', 1); +printResult('Fill buffer', $metrics, 'fill_buffer', 2, true); +printResult('Iterate over buffer', $metrics, 'iterate_buffer', 1, true); diff --git a/bench/item/Measurement.php b/bench/item/Measurement.php new file mode 100644 index 0000000..5791714 --- /dev/null +++ b/bench/item/Measurement.php @@ -0,0 +1,20 @@ +timestamp = $timestamp; + $this->sensorID = $sensorID; + $this->value = $value; + } +} diff --git a/composer.json b/composer.json index 46e5ae9..d2383a2 100644 --- a/composer.json +++ b/composer.json @@ -18,7 +18,7 @@ "php": ">=8.0 <8.4" }, "require-dev": { - "squizlabs/php_codesniffer": ">=3.7" + "squizlabs/php_codesniffer": "^3.7" }, "prefer-stable": true, "autoload": { @@ -28,7 +28,9 @@ }, "autoload-dev": { "psr-4": { - "LargeArrayBuffer\\Tests\\": "test/" + "LargeArrayBuffer\\Tests\\": "test/", + "LargeArrayBuffer\\Benchmarks\\": "bench/", + "LargeArrayBuffer\\Benchmarks\\Items\\": "bench/item/" } }, "scripts": { @@ -56,4 +58,4 @@ "update-psalm-baseline": "Updates baseline for psalm. CAUTION should not be run as a regular procedure!", "tests": "Runs all available tests." } -} \ No newline at end of file +} diff --git a/phpcs.xml b/phpcs.xml index bd2301c..4d8f6bc 100644 --- a/phpcs.xml +++ b/phpcs.xml @@ -87,7 +87,7 @@ - + diff --git a/src/LargeArrayBuffer.php b/src/LargeArrayBuffer.php index fded558..a13dde8 100644 --- a/src/LargeArrayBuffer.php +++ b/src/LargeArrayBuffer.php @@ -6,7 +6,7 @@ /** * @author Andreas Wahlen * @template E of object|array|scalar|null - * @implements \Iterator + * @implements \Iterator, E> * @psalm-suppress TooManyTemplateParams */ class LargeArrayBuffer implements \Iterator, \Countable { @@ -34,8 +34,14 @@ class LargeArrayBuffer implements \Iterator, \Countable { */ private $stream; + /** + * @var int<0, max> + */ private int $count = 0; + /** + * @var int<0, max> + */ private int $index = 0; private ?string $current = null; @@ -111,12 +117,20 @@ public function current(): mixed { if($this->current === null) { throw new \RuntimeException('index out of bounds (you might want to call next() and/or valid() before!)'); } - return match($this->serializer){ + /** @psalm-var E $res */ + $res = match($this->serializer){ //self::SERIALIZER_JSON => json_decode($this->current, flags: JSON_THROW_ON_ERROR), default => unserialize($this->current) }; + return $res; } + /** + * {@inheritDoc} + * @see \Iterator::key() + * @psalm-return int<-1, max> + * @psalm-mutation-free + */ public function key(): int { return $this->index - 1; } @@ -130,11 +144,18 @@ public function valid(): bool { /** * @return int|null size in bytes or null if unknown + * @psalm-mutation-free */ public function getSize(): ?int { return fstat($this->stream)['size'] ?? null; } + /** + * {@inheritDoc} + * @see \Countable::count() + * @psalm-return int<0, max> + * @psalm-mutation-free + */ public function count(): int { return $this->count; }