Merge pull request python#22 from dpkp/content_size

Optionally disable contentSize frame header
tbbharaj · Apr 28, 2017 · b77b080 · b77b080
2 parents feb2a00 + 040af9a
commit b77b080
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 6 deletions.
diff --git a/lz4/frame/__init__.py b/lz4/frame/__init__.py
@@ -35,6 +35,9 @@ class LZ4FrameCompressor(object):
             - lz4.frame.CONTENTCHECKSUM_DISABLED or 0: disables checksumming
             - lz4.frame.CONTENTCHECKSUM_ENABLED or 1: enables checksumming
             The default is CONTENTCHECKSUM_DISABLED.
+        content_size (bool): Specifies whether to include an optional 8-byte header
+            field that is the uncompressed size of data included within the frame.
+            Including the content-size header is optional, and is enabled by default.
         frame_type (int): Specifies whether user data can be injected between
             frames. Options:
             - lz4.frame.FRAMETYPE_FRAME or 0: disables user data injection
@@ -50,12 +53,14 @@ def __init__(self,
                  block_mode=BLOCKMODE_LINKED,
                  compression_level=COMPRESSIONLEVEL_MIN,
                  content_checksum=CONTENTCHECKSUM_DISABLED,
+                 content_size=True,
                  frame_type=FRAMETYPE_FRAME,
                  auto_flush=True):
         self.block_size = block_size
         self.block_mode = block_mode
         self.compression_level = compression_level
         self.content_checksum = content_checksum
+        self.content_size = content_size
         self.frame_type = frame_type
         self.auto_flush = auto_flush
         self._context = create_compression_context()
@@ -92,6 +97,7 @@ def compress_begin(self, source_size=0):
                                     frame_type=self.frame_type,
                                     compression_level=self.compression_level,
                                     content_checksum=self.content_checksum,
+                                    content_size=self.content_size,
                                     auto_flush=self.auto_flush,
                                     source_size=source_size)
 

diff --git a/lz4/frame/_frame.c b/lz4/frame/_frame.c
@@ -152,14 +152,17 @@ create_compression_context (PyObject * Py_UNUSED (self))
   "        - lz4.frame.CONTENTCHECKSUM_DISABLED or 0: disables checksumming\n" \
   "        - lz4.frame.CONTENTCHECKSUM_ENABLED or 1: enables checksumming\n\n" \
   "        The default is CONTENTCHECKSUM_DISABLED.\n"                  \
+  "    content_size (bool): Specifies whether to include an optional 8-byte header\n" \
+  "        field that is the uncompressed size of data included within the frame.\n" \
+  "        Including the content-size header is optional, and is enabled by default.\n" \
   "    frame_type (int): Specifies whether user data can be injected between\n" \
   "        frames. Options:\n\n"                                        \
   "        - lz4.frame.FRAMETYPE_FRAME or 0: disables user data injection\n" \
   "        - lz4.frame.FRAMETYPE_SKIPPABLEFRAME or 1: enables user data injection\n\n" \
   "        The default is lz4.frame.FRAMETYPE_FRAME.\n"                 \
 
 PyDoc_STRVAR(compress__doc,
-             "compress(source, compression_level=0, block_size=0, content_checksum=0, block_mode=0, frame_type=0)\n\n" \
+             "compress(source, compression_level=0, block_size=0, content_checksum=0, content_size=1, block_mode=0, frame_type=0)\n\n" \
              "Accepts a string, and compresses the string in one go, returning the\n" \
              "compressed string as a string of bytes. The compressed string includes\n" \
              "a header and endmark and so is suitable for writing to a file.\n\n" \
@@ -178,6 +181,7 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
 {
   const char *source;
   int source_size;
+  int content_size_header = 1;
   LZ4F_preferences_t preferences;
   size_t compressed_bound;
   Py_ssize_t dest_size;
@@ -188,6 +192,7 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
                             "compression_level",
                             "block_size",
                             "content_checksum",
+                            "content_size",
                             "block_mode",
                             "frame_type",
                             NULL
@@ -196,20 +201,28 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
 
   memset (&preferences, 0, sizeof (preferences));
 
-  if (!PyArg_ParseTupleAndKeywords (args, keywds, "s#|iiiii", kwlist,
+  if (!PyArg_ParseTupleAndKeywords (args, keywds, "s#|iiiiii", kwlist,
                                     &source, &source_size,
                                     &preferences.compressionLevel,
                                     &preferences.frameInfo.blockSizeID,
                                     &preferences.
                                     frameInfo.contentChecksumFlag,
+                                    &content_size_header,
                                     &preferences.frameInfo.blockMode,
                                     &preferences.frameInfo.frameType))
     {
       return NULL;
     }
 
   preferences.autoFlush = 0;
-  preferences.frameInfo.contentSize = source_size;
+  if (content_size_header)
+    {
+      preferences.frameInfo.contentSize = source_size;
+    }
+  else
+    {
+      preferences.frameInfo.contentSize = 0;
+    }
 
   Py_BEGIN_ALLOW_THREADS
   compressed_bound =
@@ -272,7 +285,7 @@ compress (PyObject * Py_UNUSED (self), PyObject * args,
  ******************/
 PyDoc_STRVAR(compress_begin__doc,
              "compress_begin(cCtx, source_size=0, compression_level=0, block_size=0,\n" \
-             "    content_checksum=0, block_mode=0, frame_type=0, auto_flush=1)\n\n"\
+             "    content_checksum=0, content_size=1, block_mode=0, frame_type=0, auto_flush=1)\n\n"\
              "Creates a frame header from a compression context.\n\n"   \
              "Args:\n"                                                  \
              "    context (cCtx): A compression context.\n\n"           \
@@ -296,6 +309,7 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
 {
   PyObject *py_context = NULL;
   unsigned long source_size = 0;
+  int content_size_header = 1;
   LZ4F_preferences_t preferences;
   /* Only needs to be large enough for a header, which is 15 bytes.
    * Unfortunately, the lz4 library doesn't provide a #define for this.
@@ -308,6 +322,7 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
                             "compression_level",
                             "block_size",
                             "content_checksum",
+                            "content_size",
                             "block_mode",
                             "frame_type",
                             "auto_flush",
@@ -320,12 +335,13 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
      argument */
   preferences.autoFlush = 1;
 
-  if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiii", kwlist,
+  if (!PyArg_ParseTupleAndKeywords (args, keywds, "O|kiiiiiii", kwlist,
                                     &py_context,
                                     &source_size,
                                     &preferences.compressionLevel,
                                     &preferences.frameInfo.blockSizeID,
                                     &preferences.frameInfo.contentChecksumFlag,
+                                    &content_size_header,
                                     &preferences.frameInfo.blockMode,
                                     &preferences.frameInfo.frameType,
                                     &preferences.autoFlush
@@ -334,7 +350,14 @@ compress_begin (PyObject * Py_UNUSED (self), PyObject * args,
       return NULL;
     }
 
-  preferences.frameInfo.contentSize = source_size;
+  if (content_size_header)
+    {
+      preferences.frameInfo.contentSize = source_size;
+    }
+  else
+    {
+      preferences.frameInfo.contentSize = 0;
+    }
 
   context =
     (struct compression_context *) PyCapsule_GetPointer (py_context, capsule_name);

diff --git a/tests/test_frame.py b/tests/test_frame.py
@@ -138,6 +138,7 @@ def test_compress_begin_update_end_not_defaults(self):
             block_size=lz4frame.BLOCKSIZE_MAX256KB,
             block_mode=lz4frame.BLOCKMODE_LINKED,
             compression_level=lz4frame.COMPRESSIONLEVEL_MINHC,
+            content_size=False,
             auto_flush=1
         )
         chunk_size = 128 * 1024 # 128 kb, half of block size
@@ -305,6 +306,15 @@ def test_LZ4FrameCompressor_fails(self):
                 compressed += compressor.flush()
                 compressed = compressor.compress(input_data)
 
+    def test_compress_without_content_size(self):
+        input_data = b"2099023098234882923049823094823094898239230982349081231290381209380981203981209381238901283098908123109238098123"
+        compressed = lz4frame.compress(input_data, content_size=False)
+        frame = lz4frame.get_frame_info(compressed)
+        self.assertEqual(frame['contentSize'], 0)
+        decompressed = lz4frame.decompress(compressed)
+        self.assertEqual(input_data, decompressed)
+
+
 if sys.version_info < (2, 7):
     # Poor-man unittest.TestCase.skip for Python 2.6
     del TestLZ4FrameModern