Skip to content

Commit

Permalink
Add support for LZ4 compression
Browse files Browse the repository at this point in the history
  • Loading branch information
cracksalad committed Nov 21, 2023
1 parent 9e003c8 commit 09ba1c1
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 17 deletions.
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ Use composer to install this library:

`composer require nerou/large-array-buffer`

There are pretty much no dependencies with a single exception:
If you want to use the `toJSONFile()` method, you need to install `ext-json` (PHP's PECL JSON extension) as well.
There are pretty much no dependencies with some exceptions:

- If you want to use the `toJSONFile()` method, you need to install `ext-json` (PHP's PECL JSON extension) as well.
- If you want to use LZ4 compression, `ext-lz4` is required. See [php-ext-lz4](https://github.com/kjdev/php-ext-lz4).

## Usage

Expand All @@ -46,7 +48,7 @@ The constructor of `LargeArrayBuffer` provides some options:

1. You can set the threshold when to move the data to disk. When pushing data to the buffer, it is stored in memory until it gets too large.
E.g.: `new LargeArrayBuffer(512);` to set a 512 MiB threshold.
1. You can enable GZIP compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);`
1. You can enable GZIP or LZ4 compression for the serialized items. Although this is recommended only if your items are pretty big like > 1 KiB each. E.g.: `new LargeArrayBuffer(compression: LargeArrayBuffer::COMPRESSION_GZIP);`. Note, that LZ4 compression requires [ext-lz4](https://github.com/kjdev/php-ext-lz4) to be installed.

### Read from the buffer

Expand Down Expand Up @@ -84,6 +86,8 @@ A benchmark with 1 million measurements (consisting of DateTimeImmutable, int an
| Iterate over array | 0.14 s | 478 MiB | NA |
| Fill buffer | 10.43 s | 0 B | 378.7 MiB |
| Iterate over buffer | 4.67 s | 0 B | 378.7 MiB |
| Fill buffer (GZIP) | 31.6 s | 0 B | 192.5 MiB |
| Iterate over buffer (GZIP) | 8.95 s | 192.5 MiB |

Note:

Expand Down
63 changes: 52 additions & 11 deletions bench/benchmark.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
declare(strict_types=1);

use LargeArrayBuffer\Benchmarks\LargeArrayBufferBench;
use LargeArrayBuffer\Benchmarks\Items\Measurement;
use LargeArrayBuffer\LargeArrayBuffer;

require_once dirname(__DIR__).'/vendor/autoload.php';

define('ITERATIONS', 10);
define('ARRAY_SIZE', 1_000_000);

Expand All @@ -34,12 +35,11 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1,
PHP_EOL;
}

require_once dirname(__DIR__).'/vendor/autoload.php';

$metrics = [];
for($i = 0; $i < ITERATIONS; $i++){
$bench = new LargeArrayBufferBench(ARRAY_SIZE);

// normal array
$start = microtime(true);
$memBefore = memory_get_usage(true);
$arr = $bench->arrayMeasurementsFill();
Expand All @@ -56,12 +56,11 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1,
];
unset($arr);

// normal buffer
$start = microtime(true);
$memBefore = memory_get_usage(true);
$buf = new LargeArrayBuffer(256);
$bench->bufferMeasurementsFill($buf);
$time = microtime(true) - $start;
$mem = memory_get_usage(true) - $memBefore;
$metrics['fill_buffer'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
Expand All @@ -70,19 +69,61 @@ function printResult(string $label, array $metrics, string $key, int $tabs = 1,

$start = microtime(true);
$bench->bufferMeasurementsIterate($buf);
$time = microtime(true) - $start;
$mem = memory_get_usage(true) - $memBefore;
$metrics['iterate_buffer'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];
unset($buf);

// buffer with GZIP
$start = microtime(true);
$memBefore = memory_get_usage(true);
$buf = new LargeArrayBuffer(256, compression: LargeArrayBuffer::COMPRESSION_GZIP);
$bench->bufferMeasurementsFill($buf);
$metrics['fill_buffer_gz'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];

$start = microtime(true);
$bench->bufferMeasurementsIterate($buf);
$metrics['iterate_buffer_gz'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];
unset($buf);

// buffer with LZ4
$start = microtime(true);
$memBefore = memory_get_usage(true);
$buf = new LargeArrayBuffer(256, compression: LargeArrayBuffer::COMPRESSION_LZ4);
$bench->bufferMeasurementsFill($buf);
$metrics['fill_buffer_lz4'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];

$start = microtime(true);
$bench->bufferMeasurementsIterate($buf);
$metrics['iterate_buffer_lz4'][] = [
'time' => microtime(true) - $start,
'mem' => memory_get_usage(true) - $memBefore,
'size' => $buf->getSize()
];
unset($buf);

unset($bench);
}

printResult('Fill array', $metrics, 'fill_array', 2);
printResult('Iterate over array', $metrics, 'iterate_array', 1);
printResult('Fill buffer', $metrics, 'fill_buffer', 2, true);
printResult('Iterate over buffer', $metrics, 'iterate_buffer', 1, true);
printResult('Fill array', $metrics, 'fill_array', 3);
printResult('Iterate over array', $metrics, 'iterate_array', 2);
printResult('Fill buffer', $metrics, 'fill_buffer', 3, true);
printResult('Iterate over buffer', $metrics, 'iterate_buffer', 2, true);
printResult('Fill buffer (GZIP)', $metrics, 'fill_buffer_gz', 2, true);
printResult('Iterate over buffer (GZIP)', $metrics, 'iterate_buffer_gz', 1, true);
printResult('Fill buffer (LZ4)', $metrics, 'fill_buffer_lz4', 2, true);
printResult('Iterate over buffer (LZ4)', $metrics, 'iterate_buffer_lz4', 1, true);
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
}
],
"suggest": {
"ext-json": "Requirement of toJSONFile() method"
"ext-json": "Requirement of toJSONFile() method",
"ext-lz4": "To enable support of LZ4 compression"
},
"require": {
"php": ">=8.0 <8.4"
Expand Down
9 changes: 7 additions & 2 deletions psalm.baseline.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<files psalm-version="5.12.0@f90118cdeacd0088e7215e64c0c99ceca819e176">

<files psalm-version="5.15.0@5c774aca4746caf3d239d9c8cadb9f882ca29352">
<file src="src/LargeArrayBuffer.php">
<UndefinedFunction>
<code>lz4_compress($serialized)</code>
<code>lz4_uncompress($compressed)</code>
</UndefinedFunction>
</file>
</files>
5 changes: 5 additions & 0 deletions report.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
C:\Users\andre\Informatik\Projects\nerou\LargeArrayBuffer\src\LargeArrayBuffer.php:80:5:warning - MixedAssignment: Unable to determine the type that $compressed is being assigned to
C:\Users\andre\Informatik\Projects\nerou\LargeArrayBuffer\src\LargeArrayBuffer.php:82:32:warning - UndefinedFunction: Function LargeArrayBuffer\lz4_compress does not exist
C:\Users\andre\Informatik\Projects\nerou\LargeArrayBuffer\src\LargeArrayBuffer.php:85:46:warning - MixedArgument: Argument 1 of addcslashes cannot be false|mixed|string, expecting string
C:\Users\andre\Informatik\Projects\nerou\LargeArrayBuffer\src\LargeArrayBuffer.php:112:5:warning - MixedAssignment: Unable to determine the type that $this->current is being assigned to
C:\Users\andre\Informatik\Projects\nerou\LargeArrayBuffer\src\LargeArrayBuffer.php:114:32:warning - UndefinedFunction: Function LargeArrayBuffer\lz4_uncompress does not exist
8 changes: 8 additions & 0 deletions src/LargeArrayBuffer.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class LargeArrayBuffer implements \Iterator, \Countable {

public const COMPRESSION_NONE = 0;
public const COMPRESSION_GZIP = 1;
public const COMPRESSION_LZ4 = 2;

/**
* @readonly
Expand Down Expand Up @@ -50,11 +51,16 @@ class LargeArrayBuffer implements \Iterator, \Countable {
* @param int $maxMemoryMiB maximum memory usage in MiB, when more data is pushed, disk space is used
* @psalm-param self::SERIALIZER_* $serializer
* @psalm-param self::COMPRESSION_* $compression
* @throws \InvalidArgumentException if an unsupported compression or serialization was requested
* @throws \RuntimeException if php://temp could not be opened
*/
public function __construct(int $maxMemoryMiB = 1024, int $serializer = self::SERIALIZER_PHP, int $compression = self::COMPRESSION_NONE) {
$this->serializer = $serializer;
$this->compression = $compression;
if($this->compression === self::COMPRESSION_LZ4 && !function_exists('lz4_compress')){
throw new \InvalidArgumentException('LZ4 compression was requested, but ext-lz4 is not installed');
}

$stream = fopen('php://temp/maxmemory:'.($maxMemoryMiB * 1024 * 1024), 'r+');
if($stream === false) {
throw new \RuntimeException('failed to open php://temp file descriptor');
Expand All @@ -73,6 +79,7 @@ public function push(mixed $item): void {
};
$compressed = match($this->compression){
self::COMPRESSION_GZIP => gzdeflate($serialized),
self::COMPRESSION_LZ4 => lz4_compress($serialized),
default => $serialized
};
$res = fwrite($this->stream, addcslashes($compressed, "\\\r\n")."\n");
Expand Down Expand Up @@ -104,6 +111,7 @@ public function next(): void {
$compressed = stripcslashes($line);
$this->current = match($this->compression){
self::COMPRESSION_GZIP => gzinflate($compressed),
self::COMPRESSION_LZ4 => lz4_uncompress($compressed),
default => $compressed
};
$this->index++;
Expand Down

0 comments on commit 09ba1c1

Please sign in to comment.