Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add helpers for xz, lz4 and zstandard codec and for s3 and smb remote…
… servers (#491) * allow to register sources and codecs * foward mode in append file functions * allow using remote helpers + compression helpers * add handlers for lz4, xz and zstandard compression * add handlers for s3 and smb protocols * document remote and compression handlers * update .gitignore patterns * updated chages.rst to reflect new helpers * fix bug in test case for passing CI * fix broken doc tests for s3 and smb * improve coverage for helpers test cases Co-authored-by: Juarez Rudsatz <juarez.rudsatz@ceabs.net>
- Loading branch information
Showing
19 changed files
with
771 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,9 +6,11 @@ MANIFEST | |
dist | ||
.ipynb_checkpoints/ | ||
.idea | ||
.vscode | ||
.tox/ | ||
example*.* | ||
.coverage | ||
.eggs | ||
tmp* | ||
env/ | ||
spike* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,3 +14,7 @@ Whoosh==2.7.4 | |
xlrd==1.2.0 | ||
xlwt==1.3.0 | ||
fastavro>=0.23.4 | ||
lz4 | ||
zstandard | ||
smbprotocol>=1.0.1 | ||
s3fs>=0.2.2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from __future__ import absolute_import, print_function, division | ||
|
||
from petl.io.codec.zstd import ZstandardCodec | ||
|
||
from petl.io.codec.lz4 import LZ4Codec | ||
|
||
from petl.io.codec.xz import XZCodec |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import absolute_import, print_function, division | ||
|
||
from contextlib import contextmanager | ||
|
||
from petl.io.sources import register_codec | ||
|
||
|
||
class LZ4Codec(object): | ||
''' | ||
Allows compressing and decompressing .lz4 files | ||
`LZ4`_ is lossless compression algorithm, providing compression | ||
speed greather than 500 MB/s per core (>0.15 Bytes/cycle). It features an | ||
extremely fast decoder, with speed in multiple GB/s per core (~1Byte/cycle) | ||
.. note:: | ||
For working this codec require `python-lz4`_ to be installed, e.g.:: | ||
$ pip install lz4 | ||
.. versionadded:: 1.5.0 | ||
.. _python-lz4: https://github.com/python-lz4/python-lz4 | ||
.. _LZ4: http://www.lz4.org | ||
''' | ||
|
||
def __init__(self, filename, **kwargs): | ||
self.filename = filename | ||
self.kwargs = kwargs | ||
|
||
def open_file(self, mode='rb'): | ||
import lz4.frame | ||
source = lz4.frame.open(self.filename, mode=mode, **self.kwargs) | ||
return source | ||
|
||
@contextmanager | ||
def open(self, mode='r'): | ||
mode2 = mode[:1] + r'b' # python2 | ||
source = self.open_file(mode=mode2) | ||
try: | ||
yield source | ||
finally: | ||
source.close() | ||
|
||
|
||
register_codec('.lz4', LZ4Codec) | ||
|
||
# end # |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import absolute_import, print_function, division | ||
|
||
from contextlib import contextmanager | ||
|
||
from petl.io.sources import register_codec | ||
|
||
class XZCodec(object): | ||
''' | ||
Allows compressing and decompressing .xz files compressed with `lzma`_. | ||
.. versionadded:: 1.5.0 | ||
.. _lzma: https://docs.python.org/3/library/lzma.html | ||
''' | ||
|
||
def __init__(self, filename, **kwargs): | ||
self.filename = filename | ||
self.kwargs = kwargs | ||
|
||
def open_file(self, mode='rb'): | ||
import lzma | ||
source = lzma.open(self.filename, mode=mode, **self.kwargs) | ||
return source | ||
|
||
@contextmanager | ||
def open(self, mode='r'): | ||
mode2 = mode[:1] + r'b' # python2 | ||
source = self.open_file(mode=mode2) | ||
try: | ||
yield source | ||
finally: | ||
source.close() | ||
|
||
|
||
register_codec('.xz', XZCodec) | ||
|
||
# end # |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# -*- coding: utf-8 -*- | ||
from __future__ import absolute_import, print_function, division | ||
|
||
import io | ||
from contextlib import contextmanager | ||
|
||
from petl.io.sources import register_codec | ||
|
||
|
||
class ZstandardCodec(object): | ||
''' | ||
Allows compressing and decompressing .zstd files | ||
`Zstandard`_ is a real-time compression algorithm, providing | ||
high compression ratios. It offers a very wide range of compression / speed | ||
trade-off, while being backed by a very fast decoder. | ||
.. note:: | ||
For working this codec require `zstd`_ to be installed, e.g.:: | ||
$ pip install zstandard | ||
.. versionadded:: 1.5.0 | ||
.. _zstd: https://github.com/indygreg/python-zstandard | ||
.. _Zstandard: http://www.zstd.net | ||
''' | ||
|
||
def __init__(self, filename, **kwargs): | ||
self.filename = filename | ||
self.kwargs = kwargs | ||
|
||
def open_file(self, mode='rb'): | ||
import zstandard as zstd | ||
if mode.startswith('r'): | ||
cctx = zstd.ZstdDecompressor(**self.kwargs) | ||
compressed = io.open(self.filename, mode) | ||
source = cctx.stream_reader(compressed) | ||
else: | ||
cctx = zstd.ZstdCompressor(**self.kwargs) | ||
uncompressed = io.open(self.filename, mode) | ||
source = cctx.stream_writer(uncompressed) | ||
return source | ||
|
||
@contextmanager | ||
def open(self, mode='r'): | ||
mode2 = mode[:1] + r'b' # python2 | ||
source = self.open_file(mode=mode2) | ||
try: | ||
yield source | ||
finally: | ||
source.close() | ||
|
||
|
||
register_codec('.zst', ZstandardCodec) | ||
|
||
# end # |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from __future__ import absolute_import, print_function, division | ||
|
||
from petl.io.source.s3 import S3Source | ||
|
||
from petl.io.source.smb import SMBSource |
Oops, something went wrong.