Skip to content

Commit

Permalink
Add benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
cracksalad committed Nov 21, 2023
1 parent 8b0e6a9 commit ee0f258
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
/.gitignore export-ignore
/.gitattributes export-ignore
/phpcs.xml export-ignore
/phpunit.xml export-ignore
/psalm.xml export-ignore
/psalm.baseline.xml export-ignore
/phpcs.xml export-ignore
/.phive export-ignore
/test export-ignore

Expand Down
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ The constructor of `LargeArrayBuffer` provides some options:
E.g.: `new LargeArrayBuffer(512);` to set a 512 MiB threshold.
1. You can enable GZIP compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);`

### Read your data from the buffer
### Read from the buffer

There are several options to read the data:

Expand All @@ -68,6 +68,31 @@ There are some stats you can obtain:

To put it in one sentence: This library uses [php://temp](https://www.php.net/manual/en/wrappers.php.php) as well as PHP's [serialize](https://www.php.net/manual/en/function.serialize.php)/[unserialize](https://www.php.net/manual/en/function.unserialize.php) functions to store an array on disk if it gets too large.

## Limitations and concerns

- associative arrays are not supported
- the item type needs to be compatible with PHP's [serialize](https://www.php.net/manual/en/function.serialize.php)/[unserialize](https://www.php.net/manual/en/function.unserialize.php) functions
- since storage drives (even PCIe SSDs) are a lot slower than memory and de-/serialization needs to be done, you trade hard memory overflows for performance losses

### Benchmark

A benchmark with 1 million measurements (consisting of DateTimeImmutable, int and float) using PHP 8.2 with 10 iterations comparing a normal array with the LargeArrayBuffer gave the following results (LargeArrayBuffer was configured with a memory limit of 256 MiB):

| Action | Consumed time | Consumed memory | Buffer size |
|--------|---------------|-----------------|-------------|
| Fill array | 1.65 s | 476 MiB | NA |
| Iterate over array | 0.14 s | 478 MiB | NA |
| Fill buffer | 10.43 s | 0 B | 378.7 MiB |
| Iterate over buffer | 4.67 s | 0 B | 378.7 MiB |

Note:

- The peak memory usage using the buffer is about its memory limit. The table shows the memory usage after the specified action.
- PHP seems to cache the array once it is created for the first time, although `unset` is used. That is why I have not put the average value in the table for this specific value but the maximum (first run).
- The serialized data is smaller than the binary data in memory. I have absolutly no idea why.

To reproduce call bench/benchmark.php.

## License

This library is licensed under the MIT License (MIT). Please see [LICENSE](LICENSE) for more information.
57 changes: 57 additions & 0 deletions bench/LargeArrayBufferBench.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
<?php
declare(strict_types=1);

namespace LargeArrayBuffer\Benchmarks;

use LargeArrayBuffer\LargeArrayBuffer;
use LargeArrayBuffer\Benchmarks\Items\Measurement;

/**
* @author Andreas Wahlen
*/
class LargeArrayBufferBench {

/**
* @readonly
*/
private int $count;

public function __construct(int $count){
$this->count = $count;
}

private function generateMeasurement(int $index): Measurement {
return new Measurement(
(new \DateTimeImmutable())->sub(new \DateInterval('PT'.$index.'H')),
$index % 500,
random_int(-1_000_000, 1_000_000) / 1000);;
}

public function arrayMeasurementsFill(): array {
$arr = [];
for($i = 0; $i < $this->count; $i++){
$arr[] = $this->generateMeasurement($i);
}
return $arr;
}

public function arrayMeasurementsIterate(array $arr): void {
foreach($arr as $index => $item){
$index;
$item;
}
}

public function bufferMeasurementsFill(LargeArrayBuffer $buf): void {
for($i = 0; $i < $this->count; $i++){
$buf->push($this->generateMeasurement($i));
}
}

public function bufferMeasurementsIterate(LargeArrayBuffer $buf): void {
foreach($buf as $index => $item){
$index;
$item;
}
}
}
88 changes: 88 additions & 0 deletions bench/benchmark.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
<?php
/**
* @author Andreas Wahlen
*/

declare(strict_types=1);

use LargeArrayBuffer\Benchmarks\LargeArrayBufferBench;
use LargeArrayBuffer\Benchmarks\Items\Measurement;
use LargeArrayBuffer\LargeArrayBuffer;

define('ITERATIONS', 10);
define('ARRAY_SIZE', 1_000_000);

function formatBytes(float $bytes): string {
$scales = ['B', 'kiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];
$exp = intval(floor(log($bytes, 1024)));
return round($bytes / pow(1024, $exp), 1).' '.$scales[$exp];
}

function getAverage(array $metrics, string $key): float {
return array_sum(array_column($metrics, $key)) / ITERATIONS;
}

function getMax(array $metrics, string $key): float {
return max(array_column($metrics, $key));
}

function printResult(string $label, array $metrics, string $key, int $tabs = 1, bool $inclSize = false): void {
echo $label.' (avg/max):'.str_repeat("\t", $tabs).
number_format(getAverage($metrics[$key], 'time'), 2).' s/'.number_format(getMax($metrics[$key], 'time'), 2).' s | '.
formatBytes(getAverage($metrics[$key], 'mem')).'/'.formatBytes(getMax($metrics[$key], 'mem')).' | '.
($inclSize ? formatBytes(getAverage($metrics[$key], 'size')).'/'.formatBytes(getMax($metrics[$key], 'size')) : '').
PHP_EOL;
}

require_once dirname(__DIR__).'/vendor/autoload.php';

$metrics = [];
for($i = 0; $i < ITERATIONS; $i++){
$bench = new LargeArrayBufferBench(ARRAY_SIZE);

$start = microtime(true);
$memBefore = memory_get_usage(true);
$arr = $bench->arrayMeasurementsFill();
$metrics['fill_array'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore
];

$start = microtime(true);
$bench->arrayMeasurementsIterate($arr);
$metrics['iterate_array'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore
];
unset($arr);

$start = microtime(true);
$memBefore = memory_get_usage(true);
$buf = new LargeArrayBuffer(256);
$bench->bufferMeasurementsFill($buf);
$time = microtime(true) - $start;
$mem = memory_get_usage(true) - $memBefore;
$metrics['fill_buffer'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];

$start = microtime(true);
$bench->bufferMeasurementsIterate($buf);
$time = microtime(true) - $start;
$mem = memory_get_usage(true) - $memBefore;
$metrics['iterate_buffer'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];
unset($buf);

unset($bench);
}

printResult('Fill array', $metrics, 'fill_array', 2);
printResult('Iterate over array', $metrics, 'iterate_array', 1);
printResult('Fill buffer', $metrics, 'fill_buffer', 2, true);
printResult('Iterate over buffer', $metrics, 'iterate_buffer', 1, true);
20 changes: 20 additions & 0 deletions bench/item/Measurement.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?php
declare(strict_types=1);

namespace LargeArrayBuffer\Benchmarks\Items;

/**
* @author Andreas Wahlen
*/
class Measurement {

private \DateTimeImmutable $timestamp;
private int $sensorID;
private float $value;

public function __construct(\DateTimeImmutable $timestamp, int $sensorID, float $value) {
$this->timestamp = $timestamp;
$this->sensorID = $sensorID;
$this->value = $value;
}
}
8 changes: 5 additions & 3 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"php": ">=8.0 <8.4"
},
"require-dev": {
"squizlabs/php_codesniffer": ">=3.7"
"squizlabs/php_codesniffer": "^3.7"
},
"prefer-stable": true,
"autoload": {
Expand All @@ -28,7 +28,9 @@
},
"autoload-dev": {
"psr-4": {
"LargeArrayBuffer\\Tests\\": "test/"
"LargeArrayBuffer\\Tests\\": "test/",
"LargeArrayBuffer\\Benchmarks\\": "bench/",
"LargeArrayBuffer\\Benchmarks\\Items\\": "bench/item/"
}
},
"scripts": {
Expand Down Expand Up @@ -56,4 +58,4 @@
"update-psalm-baseline": "Updates baseline for psalm. CAUTION should not be run as a regular procedure!",
"tests": "Runs all available tests."
}
}
}
2 changes: 1 addition & 1 deletion phpcs.xml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
<rule ref="PEAR.Functions.ValidDefaultValue"/>
<rule ref="PEAR.WhiteSpace.ObjectOperatorIndent">
<properties>
<property name="indent" value="2"/>
<property name="indent" value="4"/>
</properties>
</rule>

Expand Down
25 changes: 23 additions & 2 deletions src/LargeArrayBuffer.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
/**
* @author Andreas Wahlen
* @template E of object|array|scalar|null
* @implements \Iterator<int, E>
* @implements \Iterator<int<0, max>, E>
* @psalm-suppress TooManyTemplateParams
*/
class LargeArrayBuffer implements \Iterator, \Countable {
Expand Down Expand Up @@ -34,8 +34,14 @@ class LargeArrayBuffer implements \Iterator, \Countable {
*/
private $stream;

/**
* @var int<0, max>
*/
private int $count = 0;

/**
* @var int<0, max>
*/
private int $index = 0;

private ?string $current = null;
Expand Down Expand Up @@ -111,12 +117,20 @@ public function current(): mixed {
if($this->current === null) {
throw new \RuntimeException('index out of bounds (you might want to call next() and/or valid() before!)');
}
return match($this->serializer){
/** @psalm-var E $res */
$res = match($this->serializer){
//self::SERIALIZER_JSON => json_decode($this->current, flags: JSON_THROW_ON_ERROR),
default => unserialize($this->current)
};
return $res;
}

/**
* {@inheritDoc}
* @see \Iterator::key()
* @psalm-return int<-1, max>

Check failure on line 131 in src/LargeArrayBuffer.php

View workflow job for this annotation

GitHub Actions / Static Analysis

ImplementedReturnTypeMismatch

src/LargeArrayBuffer.php:131:20: ImplementedReturnTypeMismatch: The inherited return type 'int<0, max>|null' for Iterator::key is different to the implemented return type for LargeArrayBuffer\LargeArrayBuffer::key 'int<-1, max>' (see https://psalm.dev/123)
* @psalm-mutation-free
*/
public function key(): int {
return $this->index - 1;
}
Expand All @@ -130,11 +144,18 @@ public function valid(): bool {

/**
* @return int|null size in bytes or null if unknown
* @psalm-mutation-free
*/
public function getSize(): ?int {
return fstat($this->stream)['size'] ?? null;
}

/**
* {@inheritDoc}
* @see \Countable::count()
* @psalm-return int<0, max>
* @psalm-mutation-free
*/
public function count(): int {
return $this->count;
}
Expand Down

0 comments on commit ee0f258

Please sign in to comment.