diff --git a/.gitattributes b/.gitattributes
index 0fcf7b8..1314fc6 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,9 +1,9 @@
/.gitignore export-ignore
/.gitattributes export-ignore
+/phpcs.xml export-ignore
/phpunit.xml export-ignore
/psalm.xml export-ignore
/psalm.baseline.xml export-ignore
-/phpcs.xml export-ignore
/.phive export-ignore
/test export-ignore
diff --git a/README.md b/README.md
index 827cedb..5863a3d 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ The constructor of `LargeArrayBuffer` provides some options:
E.g.: `new LargeArrayBuffer(512);` to set a 512 MiB threshold.
1. You can enable GZIP compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);`
-### Read your data from the buffer
+### Read from the buffer
There are several options to read the data:
@@ -68,6 +68,31 @@ There are some stats you can obtain:
To put it in one sentence: This library uses [php://temp](https://www.php.net/manual/en/wrappers.php.php) as well as PHP's [serialize](https://www.php.net/manual/en/function.serialize.php)/[unserialize](https://www.php.net/manual/en/function.unserialize.php) functions to store an array on disk if it gets too large.
+## Limitations and concerns
+
+- associative arrays are not supported
+- the item type needs to be compatible with PHP's [serialize](https://www.php.net/manual/en/function.serialize.php)/[unserialize](https://www.php.net/manual/en/function.unserialize.php) functions
+- since storage drives (even PCIe SSDs) are a lot slower than memory and de-/serialization needs to be done, you trade hard memory overflows for performance losses
+
+### Benchmark
+
+A benchmark with 1 million measurements (consisting of DateTimeImmutable, int and float) using PHP 8.2 with 10 iterations comparing a normal array with the LargeArrayBuffer gave the following results (LargeArrayBuffer was configured with a memory limit of 256 MiB):
+
+| Action | Consumed time | Consumed memory | Buffer size |
+|--------|---------------|-----------------|-------------|
+| Fill array | 1.65 s | 476 MiB | NA |
+| Iterate over array | 0.14 s | 478 MiB | NA |
+| Fill buffer | 10.43 s | 0 B | 378.7 MiB |
+| Iterate over buffer | 4.67 s | 0 B | 378.7 MiB |
+
+Note:
+
+- The peak memory usage using the buffer is about its memory limit. The table shows the memory usage after the specified action.
+- PHP seems to cache the array once it is created for the first time, although `unset` is used. That is why I have not put the average value in the table for this specific value but the maximum (first run).
+- The serialized data is smaller than the binary data in memory. I have absolutly no idea why.
+
+To reproduce call bench/benchmark.php.
+
## License
This library is licensed under the MIT License (MIT). Please see [LICENSE](LICENSE) for more information.
diff --git a/bench/LargeArrayBufferBench.php b/bench/LargeArrayBufferBench.php
new file mode 100644
index 0000000..a7bfdfa
--- /dev/null
+++ b/bench/LargeArrayBufferBench.php
@@ -0,0 +1,57 @@
+count = $count;
+ }
+
+ private function generateMeasurement(int $index): Measurement {
+ return new Measurement(
+ (new \DateTimeImmutable())->sub(new \DateInterval('PT'.$index.'H')),
+ $index % 500,
+ random_int(-1_000_000, 1_000_000) / 1000);;
+ }
+
+ public function arrayMeasurementsFill(): array {
+ $arr = [];
+ for($i = 0; $i < $this->count; $i++){
+ $arr[] = $this->generateMeasurement($i);
+ }
+ return $arr;
+ }
+
+ public function arrayMeasurementsIterate(array $arr): void {
+ foreach($arr as $index => $item){
+ $index;
+ $item;
+ }
+ }
+
+ public function bufferMeasurementsFill(LargeArrayBuffer $buf): void {
+ for($i = 0; $i < $this->count; $i++){
+ $buf->push($this->generateMeasurement($i));
+ }
+ }
+
+ public function bufferMeasurementsIterate(LargeArrayBuffer $buf): void {
+ foreach($buf as $index => $item){
+ $index;
+ $item;
+ }
+ }
+}
diff --git a/bench/benchmark.php b/bench/benchmark.php
new file mode 100644
index 0000000..8e008d3
--- /dev/null
+++ b/bench/benchmark.php
@@ -0,0 +1,88 @@
+arrayMeasurementsFill();
+ $metrics['fill_array'][] = [
+ 'time' => microtime(true) - $start,
+ 'mem' => memory_get_usage(true) - $memBefore
+ ];
+
+ $start = microtime(true);
+ $bench->arrayMeasurementsIterate($arr);
+ $metrics['iterate_array'][] = [
+ 'time' => microtime(true) - $start,
+ 'mem' => memory_get_usage(true) - $memBefore
+ ];
+ unset($arr);
+
+ $start = microtime(true);
+ $memBefore = memory_get_usage(true);
+ $buf = new LargeArrayBuffer(256);
+ $bench->bufferMeasurementsFill($buf);
+ $time = microtime(true) - $start;
+ $mem = memory_get_usage(true) - $memBefore;
+ $metrics['fill_buffer'][] = [
+ 'time' => microtime(true) - $start,
+ 'mem' => memory_get_usage(true) - $memBefore,
+ 'size' => $buf->getSize()
+ ];
+
+ $start = microtime(true);
+ $bench->bufferMeasurementsIterate($buf);
+ $time = microtime(true) - $start;
+ $mem = memory_get_usage(true) - $memBefore;
+ $metrics['iterate_buffer'][] = [
+ 'time' => microtime(true) - $start,
+ 'mem' => memory_get_usage(true) - $memBefore,
+ 'size' => $buf->getSize()
+ ];
+ unset($buf);
+
+ unset($bench);
+}
+
+printResult('Fill array', $metrics, 'fill_array', 2);
+printResult('Iterate over array', $metrics, 'iterate_array', 1);
+printResult('Fill buffer', $metrics, 'fill_buffer', 2, true);
+printResult('Iterate over buffer', $metrics, 'iterate_buffer', 1, true);
diff --git a/bench/item/Measurement.php b/bench/item/Measurement.php
new file mode 100644
index 0000000..5791714
--- /dev/null
+++ b/bench/item/Measurement.php
@@ -0,0 +1,20 @@
+timestamp = $timestamp;
+ $this->sensorID = $sensorID;
+ $this->value = $value;
+ }
+}
diff --git a/composer.json b/composer.json
index 46e5ae9..d2383a2 100644
--- a/composer.json
+++ b/composer.json
@@ -18,7 +18,7 @@
"php": ">=8.0 <8.4"
},
"require-dev": {
- "squizlabs/php_codesniffer": ">=3.7"
+ "squizlabs/php_codesniffer": "^3.7"
},
"prefer-stable": true,
"autoload": {
@@ -28,7 +28,9 @@
},
"autoload-dev": {
"psr-4": {
- "LargeArrayBuffer\\Tests\\": "test/"
+ "LargeArrayBuffer\\Tests\\": "test/",
+ "LargeArrayBuffer\\Benchmarks\\": "bench/",
+ "LargeArrayBuffer\\Benchmarks\\Items\\": "bench/item/"
}
},
"scripts": {
@@ -56,4 +58,4 @@
"update-psalm-baseline": "Updates baseline for psalm. CAUTION should not be run as a regular procedure!",
"tests": "Runs all available tests."
}
-}
\ No newline at end of file
+}
diff --git a/phpcs.xml b/phpcs.xml
index bd2301c..4d8f6bc 100644
--- a/phpcs.xml
+++ b/phpcs.xml
@@ -87,7 +87,7 @@
-
+
diff --git a/src/LargeArrayBuffer.php b/src/LargeArrayBuffer.php
index fded558..a13dde8 100644
--- a/src/LargeArrayBuffer.php
+++ b/src/LargeArrayBuffer.php
@@ -6,7 +6,7 @@
/**
* @author Andreas Wahlen
* @template E of object|array|scalar|null
- * @implements \Iterator
+ * @implements \Iterator, E>
* @psalm-suppress TooManyTemplateParams
*/
class LargeArrayBuffer implements \Iterator, \Countable {
@@ -34,8 +34,14 @@ class LargeArrayBuffer implements \Iterator, \Countable {
*/
private $stream;
+ /**
+ * @var int<0, max>
+ */
private int $count = 0;
+ /**
+ * @var int<0, max>
+ */
private int $index = 0;
private ?string $current = null;
@@ -111,12 +117,20 @@ public function current(): mixed {
if($this->current === null) {
throw new \RuntimeException('index out of bounds (you might want to call next() and/or valid() before!)');
}
- return match($this->serializer){
+ /** @psalm-var E $res */
+ $res = match($this->serializer){
//self::SERIALIZER_JSON => json_decode($this->current, flags: JSON_THROW_ON_ERROR),
default => unserialize($this->current)
};
+ return $res;
}
+ /**
+ * {@inheritDoc}
+ * @see \Iterator::key()
+ * @psalm-return int<-1, max>
+ * @psalm-mutation-free
+ */
public function key(): int {
return $this->index - 1;
}
@@ -130,11 +144,18 @@ public function valid(): bool {
/**
* @return int|null size in bytes or null if unknown
+ * @psalm-mutation-free
*/
public function getSize(): ?int {
return fstat($this->stream)['size'] ?? null;
}
+ /**
+ * {@inheritDoc}
+ * @see \Countable::count()
+ * @psalm-return int<0, max>
+ * @psalm-mutation-free
+ */
public function count(): int {
return $this->count;
}