Skip to content

Commit

Permalink
[API] Move SQLiteIndexedTar._detectCompression into compressions.dete…
Browse files Browse the repository at this point in the history
…ctCompression
  • Loading branch information
mxmlnkn committed Apr 7, 2024
1 parent fa74ae9 commit 378a6b0
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 51 deletions.
53 changes: 2 additions & 51 deletions core/ratarmountcore/SQLiteIndexedTar.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from .ProgressBar import ProgressBar
from .SQLiteIndex import SQLiteIndex, SQLiteIndexedTarUserData
from .StenciledFile import StenciledFile
from .compressions import findAvailableOpen, getGzipInfo, TAR_COMPRESSION_FORMATS
from .compressions import detectCompression, findAvailableOpen, getGzipInfo, TAR_COMPRESSION_FORMATS
from .utils import (
RatarmountError,
InvalidIndexError,
Expand Down Expand Up @@ -1460,53 +1460,6 @@ def _checkRowsValidity(self, rows) -> bool:

return False

@staticmethod
def _detectCompression(
fileobj: IO[bytes], prioritizedBackends: Optional[List[str]], printDebug: int = 0
) -> Optional[str]:
if not isinstance(fileobj, io.IOBase) or not fileobj.seekable():
return None

oldOffset = fileobj.tell()
for compressionId, compression in TAR_COMPRESSION_FORMATS.items():
# The header check is a necessary condition not a sufficient condition.
# Especially for gzip, which only has 2 magic bytes, false positives might happen.
# Therefore, only use the magic bytes based check if the module could not be found
# in order to still be able to print pinpoint error messages.
matches = compression.checkHeader(fileobj)
fileobj.seek(oldOffset)
if not matches:
continue

formatOpen = findAvailableOpen(compressionId, prioritizedBackends)
# If no appropriate module exists, then don't do any further checks.
if not formatOpen:
if printDebug >= 1:
print(
f"[Warning] A given file with magic bytes for {compressionId} could not be opened because "
"no appropriate Python module could be loaded. Are some dependencies missing? To install "
"ratarmountcore with all dependencies do: python3 -m pip install --user ratarmountcore[full]"
)
return None

try:
compressedFileobj = formatOpen(fileobj)
# Reading 1B from a single-frame zst file might require decompressing it fully in order
# to get uncompressed file size! Avoid that. The magic bytes should suffice mostly.
# TODO: Make indexed_zstd not require the uncompressed size for the read call.
if compressionId != 'zst':
compressedFileobj.read(1)
compressedFileobj.close()
fileobj.seek(oldOffset)
return compressionId
except Exception as e:
if printDebug >= 2:
print(f"[Warning] A given file with magic bytes for {compressionId} could not be opened because:")
print(e)
fileobj.seek(oldOffset)

return None

@staticmethod
def _detectTar(fileobj: IO[bytes], encoding: str, printDebug: int = 0) -> bool:
if not isinstance(fileobj, io.IOBase) or not fileobj.seekable():
Expand Down Expand Up @@ -1538,9 +1491,7 @@ def _openCompressedFile(
Returns (tar_file_obj, raw_file_obj, compression, isTar).
raw_file_obj will be none if compression is None.
"""
compression = SQLiteIndexedTar._detectCompression(
fileobj, prioritizedBackends=prioritizedBackends, printDebug=printDebug
)
compression = detectCompression(fileobj, prioritizedBackends=prioritizedBackends, printDebug=printDebug)
if printDebug >= 3:
print(f"[Info] Detected compression {compression} for file object:", fileobj)

Expand Down
48 changes: 48 additions & 0 deletions core/ratarmountcore/compressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import collections
import concurrent.futures
import io
import os
import struct
import sys
Expand Down Expand Up @@ -425,3 +426,50 @@ def getGzipInfo(fileobj: IO[bytes]) -> Optional[Tuple[str, int]]:
return name.decode(), mtime

return None


def detectCompression(
fileobj: IO[bytes], prioritizedBackends: Optional[List[str]], printDebug: int = 0
) -> Optional[str]:
if not isinstance(fileobj, io.IOBase) or not fileobj.seekable():
return None

oldOffset = fileobj.tell()
for compressionId, compression in TAR_COMPRESSION_FORMATS.items():
# The header check is a necessary condition not a sufficient condition.
# Especially for gzip, which only has 2 magic bytes, false positives might happen.
# Therefore, only use the magic bytes based check if the module could not be found
# in order to still be able to print pinpoint error messages.
matches = compression.checkHeader(fileobj)
fileobj.seek(oldOffset)
if not matches:
continue

formatOpen = findAvailableOpen(compressionId, prioritizedBackends)
# If no appropriate module exists, then don't do any further checks.
if not formatOpen:
if printDebug >= 1:
print(
f"[Warning] A given file with magic bytes for {compressionId} could not be opened because "
"no appropriate Python module could be loaded. Are some dependencies missing? To install "
"ratarmountcore with all dependencies do: python3 -m pip install --user ratarmountcore[full]"
)
return None

try:
compressedFileobj = formatOpen(fileobj)
# Reading 1B from a single-frame zst file might require decompressing it fully in order
# to get uncompressed file size! Avoid that. The magic bytes should suffice mostly.
# TODO: Make indexed_zstd not require the uncompressed size for the read call.
if compressionId != 'zst':
compressedFileobj.read(1)
compressedFileobj.close()
fileobj.seek(oldOffset)
return compressionId
except Exception as e:
if printDebug >= 2:
print(f"[Warning] A given file with magic bytes for {compressionId} could not be opened because:")
print(e)
fileobj.seek(oldOffset)

return None

0 comments on commit 378a6b0

Please sign in to comment.