From 580780812120a42563e9d58f2e25f182d1e5fa39 Mon Sep 17 00:00:00 2001 From: Dan Ellis Date: Sun, 14 May 2023 15:16:21 -0400 Subject: [PATCH 1/6] utarfile: Support creating tar files. --- micropython/utarfile/example-create.py | 12 ++++ micropython/utarfile/utarfile.py | 97 ++++++++++++++++++++++++-- 2 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 micropython/utarfile/example-create.py diff --git a/micropython/utarfile/example-create.py b/micropython/utarfile/example-create.py new file mode 100644 index 000000000..ec9813dcb --- /dev/null +++ b/micropython/utarfile/example-create.py @@ -0,0 +1,12 @@ +import sys +import utarfile + +tarfile = sys.argv[1] +if not tarfile.endswith('.tar'): + raise ValueError('Filename %s does not end with .tar' % tarfile) + +t = utarfile.TarFile(sys.argv[1], 'w') +for filename in sys.argv[2:]: + print(filename) + t.add(filename) +t.close() diff --git a/micropython/utarfile/utarfile.py b/micropython/utarfile/utarfile.py index 21b899f02..ad642ac5c 100644 --- a/micropython/utarfile/utarfile.py +++ b/micropython/utarfile/utarfile.py @@ -1,14 +1,26 @@ import uctypes +import os # For reading files when writing tar files. # http://www.gnu.org/software/tar/manual/html_node/Standard.html TAR_HEADER = { "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), - "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 11), + "mode": (uctypes.ARRAY | 100, uctypes.UINT8 | 7), + "uid": (uctypes.ARRAY | 108, uctypes.UINT8 | 7), + "gid": (uctypes.ARRAY | 116, uctypes.UINT8 | 7), + "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 12), + "mtime": (uctypes.ARRAY | 136, uctypes.UINT8 | 12), + "chksum": (uctypes.ARRAY | 148, uctypes.UINT8 | 8), + "typeflag": (uctypes.ARRAY | 156, uctypes.UINT8 | 1), } DIRTYPE = "dir" REGTYPE = "file" +# Following https://github.com/python/cpython/blob/3.11/Lib/tarfile.py +NUL = b"\0" # the null character +BLOCKSIZE = 512 # length of processing blocks +RECORDSIZE = BLOCKSIZE * 20 # length of records + def roundup(val, align): return (val + align - 1) & ~(align - 1) @@ -54,20 +66,36 @@ def __str__(self): return "TarInfo(%r, %s, %d)" % (self.name, self.type, self.size) +def _setstring(b, s, maxlen): + """Write a string into a bytearray by copying each byte.""" + for i, c in enumerate(s.encode("utf-8")[:maxlen]): + b[i] = c + + +def _isdir(finfo): + return (finfo[0] & 0o40000) > 0 + + class TarFile: - def __init__(self, name=None, fileobj=None): + def __init__(self, name=None, mode="r", fileobj=None): + modes = {"r": "rb", "w": "wb"} + if mode not in modes: + raise ValueError("mode must be 'r' or 'w'") if fileobj: self.f = fileobj else: - self.f = open(name, "rb") + self.f = open(name, modes[mode]) self.subf = None + self.mode = mode + self.offset = 0 def next(self): if self.subf: self.subf.skip() - buf = self.f.read(512) + buf = self.f.read(BLOCKSIZE) if not buf: return None + self.offset += len(buf) h = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) @@ -79,7 +107,8 @@ def next(self): d.name = str(h.name, "utf-8").rstrip("\0") d.size = int(bytes(h.size), 8) d.type = [REGTYPE, DIRTYPE][d.name[-1] == "/"] - self.subf = d.subf = FileSection(self.f, d.size, roundup(d.size, 512)) + self.subf = d.subf = FileSection(self.f, d.size, roundup(d.size, BLOCKSIZE)) + self.offset += roundup(d.size, BLOCKSIZE) return d def __iter__(self): @@ -93,3 +122,61 @@ def __next__(self): def extractfile(self, tarinfo): return tarinfo.subf + + def addfile(self, tarinfo, fileobj=None): + # Write the header: 100 bytes of name, 8 bytes of mode in octal... + buf = bytearray(BLOCKSIZE) + name = tarinfo.name + finfo = tarinfo.finfo + if _isdir(finfo) and not name.endswith("/"): + name += "/" + hdr = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) + _setstring(hdr.name, name, 100) + _setstring(hdr.mode, "%06o " % (finfo[0] & 0o7777), 7) + _setstring(hdr.uid, "%06o " % finfo[4], 7) + _setstring(hdr.gid, "%06o " % finfo[5], 7) + _setstring(hdr.size, "%011o " % finfo[6], 12) + _setstring(hdr.mtime, "%011o " % finfo[8], 12) + _setstring(hdr.typeflag, "5" if _isdir(finfo) else "0", 1) + # Checksum is calculated with checksum field all blanks. + _setstring(hdr.chksum, " " * 8, 8) + # Calculate and insert the actual checksum. + chksum = sum(buf) + _setstring(hdr.chksum, "%06o\0" % chksum, 7) + # Emit the header. + self.f.write(buf) + self.offset += len(buf) + + # Copy the file contents, if any. + if fileobj: + n_bytes = self.f.write(fileobj.read()) + self.offset += n_bytes + remains = (-n_bytes & (BLOCKSIZE - 1)) # == 0b111111111 + if remains: + buf = bytearray(remains) + self.f.write(buf) + self.offset += len(buf) + + def add(self, name, recursive=True): + tarinfo = TarInfo() + tarinfo.name = name + tarinfo.finfo = os.stat(name) + tarinfo.type = DIRTYPE if _isdir(tarinfo.finfo) else REGTYPE + if tarinfo.type == DIRTYPE: + self.addfile(tarinfo) + if recursive: + for f in os.ilistdir(name): + self.add(name + '/' + f[0], recursive) + else: # type == REGTYPE + self.addfile(tarinfo, open(name, "rb")) + + def close(self): + # Must be called to complete writing a tar file. + if self.mode == "w": + self.f.write(NUL * (BLOCKSIZE * 2)) + self.offset += (BLOCKSIZE * 2) + remainder = self.offset % RECORDSIZE + if remainder: + self.f.write(NUL * (RECORDSIZE - remainder)) + self.f.close() + self.f = None From 72acb6f202d0f699488b6b8210d0feebf7d75dc0 Mon Sep 17 00:00:00 2001 From: Dan Ellis Date: Sun, 14 May 2023 15:30:45 -0400 Subject: [PATCH 2/6] utarfile: correct code formatting. --- micropython/utarfile/example-create.py | 6 +++--- micropython/utarfile/utarfile.py | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/micropython/utarfile/example-create.py b/micropython/utarfile/example-create.py index ec9813dcb..8a0a03fd4 100644 --- a/micropython/utarfile/example-create.py +++ b/micropython/utarfile/example-create.py @@ -2,10 +2,10 @@ import utarfile tarfile = sys.argv[1] -if not tarfile.endswith('.tar'): - raise ValueError('Filename %s does not end with .tar' % tarfile) +if not tarfile.endswith(".tar"): + raise ValueError("Filename %s does not end with .tar" % tarfile) -t = utarfile.TarFile(sys.argv[1], 'w') +t = utarfile.TarFile(sys.argv[1], "w") for filename in sys.argv[2:]: print(filename) t.add(filename) diff --git a/micropython/utarfile/utarfile.py b/micropython/utarfile/utarfile.py index ad642ac5c..6c1aed2df 100644 --- a/micropython/utarfile/utarfile.py +++ b/micropython/utarfile/utarfile.py @@ -17,9 +17,9 @@ REGTYPE = "file" # Following https://github.com/python/cpython/blob/3.11/Lib/tarfile.py -NUL = b"\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records +NUL = b"\0" # the null character +BLOCKSIZE = 512 # length of processing blocks +RECORDSIZE = BLOCKSIZE * 20 # length of records def roundup(val, align): @@ -151,7 +151,7 @@ def addfile(self, tarinfo, fileobj=None): if fileobj: n_bytes = self.f.write(fileobj.read()) self.offset += n_bytes - remains = (-n_bytes & (BLOCKSIZE - 1)) # == 0b111111111 + remains = -n_bytes & (BLOCKSIZE - 1) # == 0b111111111 if remains: buf = bytearray(remains) self.f.write(buf) @@ -166,15 +166,15 @@ def add(self, name, recursive=True): self.addfile(tarinfo) if recursive: for f in os.ilistdir(name): - self.add(name + '/' + f[0], recursive) - else: # type == REGTYPE + self.add(name + "/" + f[0], recursive) + else: # type == REGTYPE self.addfile(tarinfo, open(name, "rb")) def close(self): # Must be called to complete writing a tar file. if self.mode == "w": self.f.write(NUL * (BLOCKSIZE * 2)) - self.offset += (BLOCKSIZE * 2) + self.offset += BLOCKSIZE * 2 remainder = self.offset % RECORDSIZE if remainder: self.f.write(NUL * (RECORDSIZE - remainder)) From 68087c3b80972b6cbc3d9d3bda44c69f37ba6fde Mon Sep 17 00:00:00 2001 From: Dan Ellis Date: Sun, 14 May 2023 19:51:44 -0400 Subject: [PATCH 3/6] micropython/utarfile: Skip non-regular files on creation. --- micropython/utarfile/example-create.py | 1 - micropython/utarfile/utarfile.py | 30 +++++++++++++++++++++----- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/micropython/utarfile/example-create.py b/micropython/utarfile/example-create.py index 8a0a03fd4..f540ee227 100644 --- a/micropython/utarfile/example-create.py +++ b/micropython/utarfile/example-create.py @@ -7,6 +7,5 @@ t = utarfile.TarFile(sys.argv[1], "w") for filename in sys.argv[2:]: - print(filename) t.add(filename) t.close() diff --git a/micropython/utarfile/utarfile.py b/micropython/utarfile/utarfile.py index 6c1aed2df..27b079d07 100644 --- a/micropython/utarfile/utarfile.py +++ b/micropython/utarfile/utarfile.py @@ -72,8 +72,17 @@ def _setstring(b, s, maxlen): b[i] = c +_S_IFMT = 0o170000 +_S_IFREG = 0o100000 +_S_IFDIR = 0o040000 + + def _isdir(finfo): - return (finfo[0] & 0o40000) > 0 + return (finfo[0] & _S_IFMT) == _S_IFDIR + + +def _isreg(finfo): + return (finfo[0] & _S_IFMT) == _S_IFREG class TarFile: @@ -128,14 +137,17 @@ def addfile(self, tarinfo, fileobj=None): buf = bytearray(BLOCKSIZE) name = tarinfo.name finfo = tarinfo.finfo - if _isdir(finfo) and not name.endswith("/"): - name += "/" + size = finfo[6] + if _isdir(finfo): + size = 0 + if not name.endswith("/"): + name += "/" hdr = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) _setstring(hdr.name, name, 100) _setstring(hdr.mode, "%06o " % (finfo[0] & 0o7777), 7) _setstring(hdr.uid, "%06o " % finfo[4], 7) _setstring(hdr.gid, "%06o " % finfo[5], 7) - _setstring(hdr.size, "%011o " % finfo[6], 12) + _setstring(hdr.size, "%011o " % size, 12) _setstring(hdr.mtime, "%011o " % finfo[8], 12) _setstring(hdr.typeflag, "5" if _isdir(finfo) else "0", 1) # Checksum is calculated with checksum field all blanks. @@ -160,7 +172,15 @@ def addfile(self, tarinfo, fileobj=None): def add(self, name, recursive=True): tarinfo = TarInfo() tarinfo.name = name - tarinfo.finfo = os.stat(name) + try: + tarinfo.finfo = os.stat(name) + except OSError: + print("Cannot stat", name, " - skipping.") + return + if not (_isdir(tarinfo.finfo) or _isreg(tarinfo.finfo)): + # We only accept directories or regular files. + print(name, "is not a directory or regular file - skipping.") + return tarinfo.type = DIRTYPE if _isdir(tarinfo.finfo) else REGTYPE if tarinfo.type == DIRTYPE: self.addfile(tarinfo) From a24ac01010ffedb485b40d651afab0a28b32ffd3 Mon Sep 17 00:00:00 2001 From: Dan Ellis Date: Thu, 18 May 2023 16:54:55 -0400 Subject: [PATCH 4/6] micropython/utarfile: Tarfile create and append in new utarfile-write module. --- micropython/utarfile-write/example-append.py | 15 ++ micropython/utarfile-write/example-create.py | 14 ++ micropython/utarfile-write/manifest.py | 4 + micropython/utarfile-write/utarfile/write.py | 165 +++++++++++++++ micropython/utarfile/example-create.py | 11 - micropython/utarfile/example-extract.py | 11 +- micropython/utarfile/manifest.py | 2 +- micropython/utarfile/utarfile.py | 202 ------------------- micropython/utarfile/utarfile/__init__.py | 12 ++ micropython/utarfile/utarfile/utarfile.py | 122 +++++++++++ 10 files changed, 340 insertions(+), 218 deletions(-) create mode 100644 micropython/utarfile-write/example-append.py create mode 100644 micropython/utarfile-write/example-create.py create mode 100644 micropython/utarfile-write/manifest.py create mode 100644 micropython/utarfile-write/utarfile/write.py delete mode 100644 micropython/utarfile/example-create.py delete mode 100644 micropython/utarfile/utarfile.py create mode 100644 micropython/utarfile/utarfile/__init__.py create mode 100644 micropython/utarfile/utarfile/utarfile.py diff --git a/micropython/utarfile-write/example-append.py b/micropython/utarfile-write/example-append.py new file mode 100644 index 000000000..9adf34d13 --- /dev/null +++ b/micropython/utarfile-write/example-append.py @@ -0,0 +1,15 @@ +""" tar append writes additional files to the end of an existing tar file.""" +import os +import sys +import utarfile + +if len(sys.argv) < 2: + raise ValueError("Usage: %s appendfile.tar newinputfile1 ..." % sys.argv[0]) + +tarfile = sys.argv[1] +if not tarfile.endswith(".tar"): + raise ValueError("Filename %s does not end with .tar" % tarfile) + +with utarfile.TarFile(sys.argv[1], "a") as t: + for filename in sys.argv[2:]: + t.add(filename) diff --git a/micropython/utarfile-write/example-create.py b/micropython/utarfile-write/example-create.py new file mode 100644 index 000000000..f0c9b206a --- /dev/null +++ b/micropython/utarfile-write/example-create.py @@ -0,0 +1,14 @@ +""" tar create writes a new tar file containing the specified files.""" +import sys +import utarfile + +if len(sys.argv) < 2: + raise ValueError("Usage: %s outputfile.tar inputfile1 ..." % sys.argv[0]) + +tarfile = sys.argv[1] +if not tarfile.endswith(".tar"): + raise ValueError("Filename %s does not end with .tar" % tarfile) + +with utarfile.TarFile(sys.argv[1], "w") as t: + for filename in sys.argv[2:]: + t.add(filename) diff --git a/micropython/utarfile-write/manifest.py b/micropython/utarfile-write/manifest.py new file mode 100644 index 000000000..188c9349c --- /dev/null +++ b/micropython/utarfile-write/manifest.py @@ -0,0 +1,4 @@ +metadata(description="Lightweight tarfile module writing subset", version="0.1") + +require("utarfile") +package("utarfile") diff --git a/micropython/utarfile-write/utarfile/write.py b/micropython/utarfile-write/utarfile/write.py new file mode 100644 index 000000000..6036298a1 --- /dev/null +++ b/micropython/utarfile-write/utarfile/write.py @@ -0,0 +1,165 @@ +"""Additions to the TarFile class to support creating and appending tar files. + +The methods defined below in the TarInfoWrite and TarFileWrite are actually +copied into the correspodning TarInfo and TarFile classes from the utarfile +module. +""" + +import uctypes +import os + +# Extended subset of tar header fields including the ones we'll write. +# http://www.gnu.org/software/tar/manual/html_node/Standard.html +TAR_HEADER = { + "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), + "mode": (uctypes.ARRAY | 100, uctypes.UINT8 | 7), + "uid": (uctypes.ARRAY | 108, uctypes.UINT8 | 7), + "gid": (uctypes.ARRAY | 116, uctypes.UINT8 | 7), + "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 12), + "mtime": (uctypes.ARRAY | 136, uctypes.UINT8 | 12), + "chksum": (uctypes.ARRAY | 148, uctypes.UINT8 | 8), + "typeflag": (uctypes.ARRAY | 156, uctypes.UINT8 | 1), +} + +# Following https://github.com/python/cpython/blob/3.11/Lib/tarfile.py +NUL = b"\0" # the null character +BLOCKSIZE = 512 # length of processing blocks +RECORDSIZE = BLOCKSIZE * 20 # length of records + +# Duplicated from utarfile.py. +DIRTYPE = "dir" +REGTYPE = "file" + +# Constants for TarInfo.isdir, isreg. +_S_IFMT = 0o170000 +_S_IFREG = 0o100000 +_S_IFDIR = 0o040000 + + +def _setstring(b, s, maxlen): + """Write a string into a bytearray by copying each byte.""" + for i, c in enumerate(s.encode("utf-8")[:maxlen]): + b[i] = c + + +class TarInfoWrite: + added_methods = ["_from_stat", "isdir", "isreg"] + + def _from_stat(self, stat): + """Extended TarInfo for use by utarfile-write.""" + # stat is return from os.stat. + self.mode = stat[0] + # Overwrite name-based type inference using mode bits. + self.type = DIRTYPE if self.isdir() else REGTYPE + self.uid = stat[4] + self.gid = stat[5] + self.size = stat[6] + self.mtime = stat[8] + + def isdir(self): + return (self.mode & _S_IFMT) == _S_IFDIR + + def isreg(self): + return (self.mode & _S_IFMT) == _S_IFREG + + +class TarFileWrite: + added_methods = [ + "_open_write", "__enter__", "__exit__", "addfile", "add", "close" + ] + + def _open_write(self, name, mode, fileobj): + if mode == "w": + if not fileobj: + self.f = open(name, "wb") + else: + self.f = fileobj + elif mode == "a": + if not fileobj: + self.f = open(name, "r+b") + else: + self.f = fileobj + # Read through the existing file. + while self.next(): + pass + # Position at start of end block. + self.f.seek(self.offset) + else: + raise ValueError("mode " + mode + " not supported.") + + def __enter__(self): + """Make usable with "with" statement.""" + return self + + def __exit__(self, unused_type, unused_value, unused_traceback): + """Make usable with "with" statement.""" + self.close() + + def addfile(self, tarinfo, fileobj=None): + # Write the header: 100 bytes of name, 8 bytes of mode in octal... + buf = bytearray(BLOCKSIZE) + name = tarinfo.name + size = tarinfo.size + if tarinfo.isdir(): + size = 0 + if not name.endswith("/"): + name += "/" + hdr = uctypes.struct( + uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN + ) + _setstring(hdr.name, name, 100) + _setstring(hdr.mode, "%06o " % (tarinfo.mode & 0o7777), 7) + _setstring(hdr.uid, "%06o " % tarinfo.uid, 7) + _setstring(hdr.gid, "%06o " % tarinfo.gid, 7) + _setstring(hdr.size, "%011o " % size, 12) + _setstring(hdr.mtime, "%011o " % tarinfo.mtime, 12) + _setstring(hdr.typeflag, "5" if tarinfo.isdir() else "0", 1) + # Checksum is calculated with checksum field all blanks. + _setstring(hdr.chksum, " " * 8, 8) + # Calculate and insert the actual checksum. + chksum = sum(buf) + _setstring(hdr.chksum, "%06o\0" % chksum, 7) + # Emit the header. + self.f.write(buf) + self.offset += len(buf) + + # Copy the file contents, if any. + if fileobj: + n_bytes = self.f.write(fileobj.read()) + self.offset += n_bytes + remains = -n_bytes & (BLOCKSIZE - 1) # == 0b111111111 + if remains: + buf = bytearray(remains) + self.f.write(buf) + self.offset += len(buf) + + def add(self, name, recursive=True): + # self.TarInfo will exist when this method is pasted into TarFile. + tarinfo = self.TarInfo(name) + try: + tarinfo._from_stat(os.stat(name)) + except OSError: + print("Cannot stat", name, " - skipping.") + return + if not (tarinfo.isdir() or tarinfo.isreg()): + # We only accept directories or regular files. + print(name, "is not a directory or regular file - skipping.") + return + if tarinfo.isdir(): + self.addfile(tarinfo) + if recursive: + for f in os.ilistdir(name): + self.add(name + "/" + f[0], recursive) + else: # type == REGTYPE + self.addfile(tarinfo, open(name, "rb")) + + def close(self): + # Must be called to complete writing a tar file. + if self.mode == "w": + self.f.write(NUL * (BLOCKSIZE * 2)) + self.offset += BLOCKSIZE * 2 + remainder = self.offset % RECORDSIZE + if remainder: + self.f.write(NUL * (RECORDSIZE - remainder)) + self.f.close() + self.f = None diff --git a/micropython/utarfile/example-create.py b/micropython/utarfile/example-create.py deleted file mode 100644 index f540ee227..000000000 --- a/micropython/utarfile/example-create.py +++ /dev/null @@ -1,11 +0,0 @@ -import sys -import utarfile - -tarfile = sys.argv[1] -if not tarfile.endswith(".tar"): - raise ValueError("Filename %s does not end with .tar" % tarfile) - -t = utarfile.TarFile(sys.argv[1], "w") -for filename in sys.argv[2:]: - t.add(filename) -t.close() diff --git a/micropython/utarfile/example-extract.py b/micropython/utarfile/example-extract.py index a8f828cc9..a8a05d5bc 100644 --- a/micropython/utarfile/example-extract.py +++ b/micropython/utarfile/example-extract.py @@ -1,13 +1,16 @@ import sys import os -import shutil import utarfile +if len(sys.argv) < 2: + raise ValueError("Usage: %s inputfile.tar" % sys.argv[0]) + t = utarfile.TarFile(sys.argv[1]) for i in t: - print(i) + print(i.name) if i.type == utarfile.DIRTYPE: - os.makedirs(i.name) + os.mkdir(i.name) else: f = t.extractfile(i) - shutil.copyfileobj(f, open(i.name, "wb")) + with open(i.name, "wb") as of: + of.write(f.read()) diff --git a/micropython/utarfile/manifest.py b/micropython/utarfile/manifest.py index 65bd68b9a..d3646b346 100644 --- a/micropython/utarfile/manifest.py +++ b/micropython/utarfile/manifest.py @@ -2,4 +2,4 @@ # Originally written by Paul Sokolovsky. -module("utarfile.py") +package("utarfile") diff --git a/micropython/utarfile/utarfile.py b/micropython/utarfile/utarfile.py deleted file mode 100644 index 27b079d07..000000000 --- a/micropython/utarfile/utarfile.py +++ /dev/null @@ -1,202 +0,0 @@ -import uctypes -import os # For reading files when writing tar files. - -# http://www.gnu.org/software/tar/manual/html_node/Standard.html -TAR_HEADER = { - "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), - "mode": (uctypes.ARRAY | 100, uctypes.UINT8 | 7), - "uid": (uctypes.ARRAY | 108, uctypes.UINT8 | 7), - "gid": (uctypes.ARRAY | 116, uctypes.UINT8 | 7), - "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 12), - "mtime": (uctypes.ARRAY | 136, uctypes.UINT8 | 12), - "chksum": (uctypes.ARRAY | 148, uctypes.UINT8 | 8), - "typeflag": (uctypes.ARRAY | 156, uctypes.UINT8 | 1), -} - -DIRTYPE = "dir" -REGTYPE = "file" - -# Following https://github.com/python/cpython/blob/3.11/Lib/tarfile.py -NUL = b"\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records - - -def roundup(val, align): - return (val + align - 1) & ~(align - 1) - - -class FileSection: - def __init__(self, f, content_len, aligned_len): - self.f = f - self.content_len = content_len - self.align = aligned_len - content_len - - def read(self, sz=65536): - if self.content_len == 0: - return b"" - if sz > self.content_len: - sz = self.content_len - data = self.f.read(sz) - sz = len(data) - self.content_len -= sz - return data - - def readinto(self, buf): - if self.content_len == 0: - return 0 - if len(buf) > self.content_len: - buf = memoryview(buf)[: self.content_len] - sz = self.f.readinto(buf) - self.content_len -= sz - return sz - - def skip(self): - sz = self.content_len + self.align - if sz: - buf = bytearray(16) - while sz: - s = min(sz, 16) - self.f.readinto(buf, s) - sz -= s - - -class TarInfo: - def __str__(self): - return "TarInfo(%r, %s, %d)" % (self.name, self.type, self.size) - - -def _setstring(b, s, maxlen): - """Write a string into a bytearray by copying each byte.""" - for i, c in enumerate(s.encode("utf-8")[:maxlen]): - b[i] = c - - -_S_IFMT = 0o170000 -_S_IFREG = 0o100000 -_S_IFDIR = 0o040000 - - -def _isdir(finfo): - return (finfo[0] & _S_IFMT) == _S_IFDIR - - -def _isreg(finfo): - return (finfo[0] & _S_IFMT) == _S_IFREG - - -class TarFile: - def __init__(self, name=None, mode="r", fileobj=None): - modes = {"r": "rb", "w": "wb"} - if mode not in modes: - raise ValueError("mode must be 'r' or 'w'") - if fileobj: - self.f = fileobj - else: - self.f = open(name, modes[mode]) - self.subf = None - self.mode = mode - self.offset = 0 - - def next(self): - if self.subf: - self.subf.skip() - buf = self.f.read(BLOCKSIZE) - if not buf: - return None - self.offset += len(buf) - - h = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) - - # Empty block means end of archive - if h.name[0] == 0: - return None - - d = TarInfo() - d.name = str(h.name, "utf-8").rstrip("\0") - d.size = int(bytes(h.size), 8) - d.type = [REGTYPE, DIRTYPE][d.name[-1] == "/"] - self.subf = d.subf = FileSection(self.f, d.size, roundup(d.size, BLOCKSIZE)) - self.offset += roundup(d.size, BLOCKSIZE) - return d - - def __iter__(self): - return self - - def __next__(self): - v = self.next() - if v is None: - raise StopIteration - return v - - def extractfile(self, tarinfo): - return tarinfo.subf - - def addfile(self, tarinfo, fileobj=None): - # Write the header: 100 bytes of name, 8 bytes of mode in octal... - buf = bytearray(BLOCKSIZE) - name = tarinfo.name - finfo = tarinfo.finfo - size = finfo[6] - if _isdir(finfo): - size = 0 - if not name.endswith("/"): - name += "/" - hdr = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) - _setstring(hdr.name, name, 100) - _setstring(hdr.mode, "%06o " % (finfo[0] & 0o7777), 7) - _setstring(hdr.uid, "%06o " % finfo[4], 7) - _setstring(hdr.gid, "%06o " % finfo[5], 7) - _setstring(hdr.size, "%011o " % size, 12) - _setstring(hdr.mtime, "%011o " % finfo[8], 12) - _setstring(hdr.typeflag, "5" if _isdir(finfo) else "0", 1) - # Checksum is calculated with checksum field all blanks. - _setstring(hdr.chksum, " " * 8, 8) - # Calculate and insert the actual checksum. - chksum = sum(buf) - _setstring(hdr.chksum, "%06o\0" % chksum, 7) - # Emit the header. - self.f.write(buf) - self.offset += len(buf) - - # Copy the file contents, if any. - if fileobj: - n_bytes = self.f.write(fileobj.read()) - self.offset += n_bytes - remains = -n_bytes & (BLOCKSIZE - 1) # == 0b111111111 - if remains: - buf = bytearray(remains) - self.f.write(buf) - self.offset += len(buf) - - def add(self, name, recursive=True): - tarinfo = TarInfo() - tarinfo.name = name - try: - tarinfo.finfo = os.stat(name) - except OSError: - print("Cannot stat", name, " - skipping.") - return - if not (_isdir(tarinfo.finfo) or _isreg(tarinfo.finfo)): - # We only accept directories or regular files. - print(name, "is not a directory or regular file - skipping.") - return - tarinfo.type = DIRTYPE if _isdir(tarinfo.finfo) else REGTYPE - if tarinfo.type == DIRTYPE: - self.addfile(tarinfo) - if recursive: - for f in os.ilistdir(name): - self.add(name + "/" + f[0], recursive) - else: # type == REGTYPE - self.addfile(tarinfo, open(name, "rb")) - - def close(self): - # Must be called to complete writing a tar file. - if self.mode == "w": - self.f.write(NUL * (BLOCKSIZE * 2)) - self.offset += BLOCKSIZE * 2 - remainder = self.offset % RECORDSIZE - if remainder: - self.f.write(NUL * (RECORDSIZE - remainder)) - self.f.close() - self.f = None diff --git a/micropython/utarfile/utarfile/__init__.py b/micropython/utarfile/utarfile/__init__.py new file mode 100644 index 000000000..39f8ed85d --- /dev/null +++ b/micropython/utarfile/utarfile/__init__.py @@ -0,0 +1,12 @@ +from .utarfile import * + +try: + from .write import TarInfoWrite, TarFileWrite + + for method in TarInfoWrite.added_methods: + setattr(TarInfo, method, getattr(TarInfoWrite, method)) + for method in TarFileWrite.added_methods: + setattr(TarFile, method, getattr(TarFileWrite, method)) + +except ImportError: + pass diff --git a/micropython/utarfile/utarfile/utarfile.py b/micropython/utarfile/utarfile/utarfile.py new file mode 100644 index 000000000..2ae1d1023 --- /dev/null +++ b/micropython/utarfile/utarfile/utarfile.py @@ -0,0 +1,122 @@ +"""Subset of cpython tarfile class methods needed to decode tar files.""" + +import uctypes + +# Minimal set of tar header fields for reading. +# http://www.gnu.org/software/tar/manual/html_node/Standard.html +TAR_HEADER = { + "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), + "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 12), +} + +DIRTYPE = "dir" +REGTYPE = "file" + +BLOCKSIZE = 512 # length of processing blocks + + +def roundup(val, align): + return (val + align - 1) & ~(align - 1) + + +class FileSection: + def __init__(self, f, content_len, aligned_len): + self.f = f + self.content_len = content_len + self.align = aligned_len - content_len + + def read(self, sz=65536): + if self.content_len == 0: + return b"" + if sz > self.content_len: + sz = self.content_len + data = self.f.read(sz) + sz = len(data) + self.content_len -= sz + return data + + def readinto(self, buf): + if self.content_len == 0: + return 0 + if len(buf) > self.content_len: + buf = memoryview(buf)[: self.content_len] + sz = self.f.readinto(buf) + self.content_len -= sz + return sz + + def skip(self): + sz = self.content_len + self.align + if sz: + buf = bytearray(16) + while sz: + s = min(sz, 16) + self.f.readinto(buf, s) + sz -= s + + +class TarInfo: + def __init__(self, name=''): + self.name = name + self.type = DIRTYPE if self.name[-1] == "/" else REGTYPE + + def __str__(self): + return "TarInfo(%r, %s, %d)" % (self.name, self.type, self.size) + + + +class TarFile: + def __init__(self, name=None, mode="r", fileobj=None): + self.subf = None + self.mode = mode + self.offset = 0 + if mode == "r": + if fileobj: + self.f = fileobj + else: + self.f = open(name, "rb") + else: + try: + self._open_write(name=name, mode=mode, fileobj=fileobj) + except NameError: + raise NameError("Install utarfile-write") + + def next(self): + if self.subf: + self.subf.skip() + buf = self.f.read(BLOCKSIZE) + if not buf: + return None + + h = uctypes.struct( + uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN + ) + + # Empty block means end of archive + if h.name[0] == 0: + return None + + # Update the offset once we're sure it's not the run-out. + self.offset += len(buf) + d = TarInfo(str(h.name, "utf-8").rstrip("\0")) + d.size = int(bytes(h.size), 8) + self.subf = d.subf = FileSection( + self.f, d.size, roundup(d.size, BLOCKSIZE) + ) + self.offset += roundup(d.size, BLOCKSIZE) + return d + + def __iter__(self): + return self + + def __next__(self): + v = self.next() + if v is None: + raise StopIteration + return v + + def extractfile(self, tarinfo): + return tarinfo.subf + + def TarInfo(self, name): + """Allow the TarFileCreate methods to reach TarInfo.""" + return TarInfo(name) From 073ad0e64f19da58a4172f719b339f770551e408 Mon Sep 17 00:00:00 2001 From: Dan Ellis Date: Thu, 18 May 2023 17:05:02 -0400 Subject: [PATCH 5/6] micropython/utarfile-write: Whitespace fixes. --- micropython/utarfile-write/utarfile/write.py | 12 ++++-------- micropython/utarfile/utarfile/__init__.py | 12 ++++++------ micropython/utarfile/utarfile/utarfile.py | 11 +++-------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/micropython/utarfile-write/utarfile/write.py b/micropython/utarfile-write/utarfile/write.py index 6036298a1..0551a1969 100644 --- a/micropython/utarfile-write/utarfile/write.py +++ b/micropython/utarfile-write/utarfile/write.py @@ -44,7 +44,7 @@ def _setstring(b, s, maxlen): class TarInfoWrite: added_methods = ["_from_stat", "isdir", "isreg"] - + def _from_stat(self, stat): """Extended TarInfo for use by utarfile-write.""" # stat is return from os.stat. @@ -55,7 +55,7 @@ def _from_stat(self, stat): self.gid = stat[5] self.size = stat[6] self.mtime = stat[8] - + def isdir(self): return (self.mode & _S_IFMT) == _S_IFDIR @@ -64,9 +64,7 @@ def isreg(self): class TarFileWrite: - added_methods = [ - "_open_write", "__enter__", "__exit__", "addfile", "add", "close" - ] + added_methods = ["_open_write", "__enter__", "__exit__", "addfile", "add", "close"] def _open_write(self, name, mode, fileobj): if mode == "w": @@ -104,9 +102,7 @@ def addfile(self, tarinfo, fileobj=None): size = 0 if not name.endswith("/"): name += "/" - hdr = uctypes.struct( - uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN - ) + hdr = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) _setstring(hdr.name, name, 100) _setstring(hdr.mode, "%06o " % (tarinfo.mode & 0o7777), 7) _setstring(hdr.uid, "%06o " % tarinfo.uid, 7) diff --git a/micropython/utarfile/utarfile/__init__.py b/micropython/utarfile/utarfile/__init__.py index 39f8ed85d..464fdd8cc 100644 --- a/micropython/utarfile/utarfile/__init__.py +++ b/micropython/utarfile/utarfile/__init__.py @@ -1,12 +1,12 @@ from .utarfile import * try: - from .write import TarInfoWrite, TarFileWrite + from .write import TarInfoWrite, TarFileWrite - for method in TarInfoWrite.added_methods: - setattr(TarInfo, method, getattr(TarInfoWrite, method)) - for method in TarFileWrite.added_methods: - setattr(TarFile, method, getattr(TarFileWrite, method)) + for method in TarInfoWrite.added_methods: + setattr(TarInfo, method, getattr(TarInfoWrite, method)) + for method in TarFileWrite.added_methods: + setattr(TarFile, method, getattr(TarFileWrite, method)) except ImportError: - pass + pass diff --git a/micropython/utarfile/utarfile/utarfile.py b/micropython/utarfile/utarfile/utarfile.py index 2ae1d1023..84dcfb36b 100644 --- a/micropython/utarfile/utarfile/utarfile.py +++ b/micropython/utarfile/utarfile/utarfile.py @@ -55,7 +55,7 @@ def skip(self): class TarInfo: - def __init__(self, name=''): + def __init__(self, name=""): self.name = name self.type = DIRTYPE if self.name[-1] == "/" else REGTYPE @@ -63,7 +63,6 @@ def __str__(self): return "TarInfo(%r, %s, %d)" % (self.name, self.type, self.size) - class TarFile: def __init__(self, name=None, mode="r", fileobj=None): self.subf = None @@ -87,9 +86,7 @@ def next(self): if not buf: return None - h = uctypes.struct( - uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN - ) + h = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) # Empty block means end of archive if h.name[0] == 0: @@ -99,9 +96,7 @@ def next(self): self.offset += len(buf) d = TarInfo(str(h.name, "utf-8").rstrip("\0")) d.size = int(bytes(h.size), 8) - self.subf = d.subf = FileSection( - self.f, d.size, roundup(d.size, BLOCKSIZE) - ) + self.subf = d.subf = FileSection(self.f, d.size, roundup(d.size, BLOCKSIZE)) self.offset += roundup(d.size, BLOCKSIZE) return d From e16b64b357dde9e71511d0b8feeb81ae05e2b871 Mon Sep 17 00:00:00 2001 From: Dan Ellis Date: Sun, 21 May 2023 07:30:09 -0400 Subject: [PATCH 6/6] micropython/utarfile: @jimmo's reorganization and optimizations. --- micropython/utarfile-write/utarfile/write.py | 240 +++++++++---------- micropython/utarfile/utarfile/__init__.py | 146 ++++++++++- micropython/utarfile/utarfile/utarfile.py | 117 --------- 3 files changed, 246 insertions(+), 257 deletions(-) delete mode 100644 micropython/utarfile/utarfile/utarfile.py diff --git a/micropython/utarfile-write/utarfile/write.py b/micropython/utarfile-write/utarfile/write.py index 0551a1969..c982e8258 100644 --- a/micropython/utarfile-write/utarfile/write.py +++ b/micropython/utarfile-write/utarfile/write.py @@ -1,8 +1,7 @@ """Additions to the TarFile class to support creating and appending tar files. -The methods defined below in the TarInfoWrite and TarFileWrite are actually -copied into the correspodning TarInfo and TarFile classes from the utarfile -module. +The methods defined below in are injected into the TarFile class in the +utarfile package. """ import uctypes @@ -10,7 +9,7 @@ # Extended subset of tar header fields including the ones we'll write. # http://www.gnu.org/software/tar/manual/html_node/Standard.html -TAR_HEADER = { +_TAR_HEADER = { "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), "mode": (uctypes.ARRAY | 100, uctypes.UINT8 | 7), "uid": (uctypes.ARRAY | 108, uctypes.UINT8 | 7), @@ -22,140 +21,115 @@ } # Following https://github.com/python/cpython/blob/3.11/Lib/tarfile.py -NUL = b"\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records - -# Duplicated from utarfile.py. -DIRTYPE = "dir" -REGTYPE = "file" - -# Constants for TarInfo.isdir, isreg. -_S_IFMT = 0o170000 -_S_IFREG = 0o100000 -_S_IFDIR = 0o040000 +_NUL = const(b"\0") # the null character +_BLOCKSIZE = const(512) # length of processing blocks +_RECORDSIZE = const(_BLOCKSIZE * 20) # length of records +# Write a string into a bytearray by copying each byte. def _setstring(b, s, maxlen): - """Write a string into a bytearray by copying each byte.""" for i, c in enumerate(s.encode("utf-8")[:maxlen]): b[i] = c -class TarInfoWrite: - added_methods = ["_from_stat", "isdir", "isreg"] - - def _from_stat(self, stat): - """Extended TarInfo for use by utarfile-write.""" - # stat is return from os.stat. - self.mode = stat[0] - # Overwrite name-based type inference using mode bits. - self.type = DIRTYPE if self.isdir() else REGTYPE - self.uid = stat[4] - self.gid = stat[5] - self.size = stat[6] - self.mtime = stat[8] - - def isdir(self): - return (self.mode & _S_IFMT) == _S_IFDIR - - def isreg(self): - return (self.mode & _S_IFMT) == _S_IFREG - - -class TarFileWrite: - added_methods = ["_open_write", "__enter__", "__exit__", "addfile", "add", "close"] - - def _open_write(self, name, mode, fileobj): - if mode == "w": - if not fileobj: - self.f = open(name, "wb") - else: - self.f = fileobj - elif mode == "a": - if not fileobj: - self.f = open(name, "r+b") - else: - self.f = fileobj - # Read through the existing file. - while self.next(): - pass - # Position at start of end block. - self.f.seek(self.offset) +def _open_write(self, name, mode, fileobj): + if mode == "w": + if not fileobj: + self.f = open(name, "wb") + else: + self.f = fileobj + elif mode == "a": + if not fileobj: + self.f = open(name, "r+b") else: - raise ValueError("mode " + mode + " not supported.") - - def __enter__(self): - """Make usable with "with" statement.""" - return self - - def __exit__(self, unused_type, unused_value, unused_traceback): - """Make usable with "with" statement.""" - self.close() - - def addfile(self, tarinfo, fileobj=None): - # Write the header: 100 bytes of name, 8 bytes of mode in octal... - buf = bytearray(BLOCKSIZE) - name = tarinfo.name - size = tarinfo.size - if tarinfo.isdir(): - size = 0 - if not name.endswith("/"): - name += "/" - hdr = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) - _setstring(hdr.name, name, 100) - _setstring(hdr.mode, "%06o " % (tarinfo.mode & 0o7777), 7) - _setstring(hdr.uid, "%06o " % tarinfo.uid, 7) - _setstring(hdr.gid, "%06o " % tarinfo.gid, 7) - _setstring(hdr.size, "%011o " % size, 12) - _setstring(hdr.mtime, "%011o " % tarinfo.mtime, 12) - _setstring(hdr.typeflag, "5" if tarinfo.isdir() else "0", 1) - # Checksum is calculated with checksum field all blanks. - _setstring(hdr.chksum, " " * 8, 8) - # Calculate and insert the actual checksum. - chksum = sum(buf) - _setstring(hdr.chksum, "%06o\0" % chksum, 7) - # Emit the header. - self.f.write(buf) - self.offset += len(buf) - - # Copy the file contents, if any. - if fileobj: - n_bytes = self.f.write(fileobj.read()) - self.offset += n_bytes - remains = -n_bytes & (BLOCKSIZE - 1) # == 0b111111111 - if remains: - buf = bytearray(remains) - self.f.write(buf) - self.offset += len(buf) - - def add(self, name, recursive=True): - # self.TarInfo will exist when this method is pasted into TarFile. - tarinfo = self.TarInfo(name) - try: - tarinfo._from_stat(os.stat(name)) - except OSError: - print("Cannot stat", name, " - skipping.") - return - if not (tarinfo.isdir() or tarinfo.isreg()): - # We only accept directories or regular files. - print(name, "is not a directory or regular file - skipping.") - return - if tarinfo.isdir(): - self.addfile(tarinfo) - if recursive: - for f in os.ilistdir(name): - self.add(name + "/" + f[0], recursive) - else: # type == REGTYPE - self.addfile(tarinfo, open(name, "rb")) - - def close(self): - # Must be called to complete writing a tar file. - if self.mode == "w": - self.f.write(NUL * (BLOCKSIZE * 2)) - self.offset += BLOCKSIZE * 2 - remainder = self.offset % RECORDSIZE - if remainder: - self.f.write(NUL * (RECORDSIZE - remainder)) - self.f.close() - self.f = None + self.f = fileobj + # Read through the existing file. + while self.next(): + pass + # Position at start of end block. + self.f.seek(self.offset) + else: + raise ValueError("mode " + mode + " not supported.") + + +def addfile(self, tarinfo, fileobj=None): + # Write the header: 100 bytes of name, 8 bytes of mode in octal... + buf = bytearray(_BLOCKSIZE) + name = tarinfo.name + size = tarinfo.size + if tarinfo.isdir(): + size = 0 + if not name.endswith("/"): + name += "/" + hdr = uctypes.struct(uctypes.addressof(buf), _TAR_HEADER, uctypes.LITTLE_ENDIAN) + _setstring(hdr.name, name, 100) + _setstring(hdr.mode, "%06o " % (tarinfo.mode & 0o7777), 7) + _setstring(hdr.uid, "%06o " % tarinfo.uid, 7) + _setstring(hdr.gid, "%06o " % tarinfo.gid, 7) + _setstring(hdr.size, "%011o " % size, 12) + _setstring(hdr.mtime, "%011o " % tarinfo.mtime, 12) + _setstring(hdr.typeflag, "5" if tarinfo.isdir() else "0", 1) + # Checksum is calculated with checksum field all blanks. + _setstring(hdr.chksum, " " * 8, 8) + # Calculate and insert the actual checksum. + chksum = sum(buf) + _setstring(hdr.chksum, "%06o\0" % chksum, 7) + # Emit the header. + self.f.write(buf) + self.offset += len(buf) + + # Copy the file contents, if any. + if fileobj: + n_bytes = self.f.write(fileobj.read()) + self.offset += n_bytes + remains = -n_bytes & (_BLOCKSIZE - 1) # == 0b111111111 + if remains: + buf = bytearray(remains) + self.f.write(buf) + self.offset += len(buf) + + +def add(self, name, recursive=True): + # self.TarInfo will exist when this method is pasted into TarFile. + tarinfo = TarInfo(name) + try: + stat = os.stat(name) + tarinfo.mode = stat[0] + tarinfo.uid = stat[4] + tarinfo.gid = stat[5] + tarinfo.size = stat[6] + tarinfo.mtime = stat[8] + except OSError: + print("Cannot stat", name, " - skipping.") + return + if not (tarinfo.isdir() or tarinfo.isreg()): + # We only accept directories or regular files. + print(name, "is not a directory or regular file - skipping.") + return + if tarinfo.isdir(): + self.addfile(tarinfo) + if recursive: + for f in os.ilistdir(name): + self.add(name + "/" + f[0], recursive) + else: # type == REGTYPE + self.addfile(tarinfo, open(name, "rb")) + + +def close(self): + # Must be called to complete writing a tar file. + if self.mode == "w": + self.f.write(_NUL * (_BLOCKSIZE * 2)) + self.offset += _BLOCKSIZE * 2 + remainder = self.offset % _RECORDSIZE + if remainder: + self.f.write(_NUL * (_RECORDSIZE - remainder)) + self.f.close() + + +# Inject extra functionality into TarFile. +from . import TarFile, TarInfo + +TarFile._open_write = _open_write +TarFile.addfile = addfile +TarFile.add = add +TarFile.close = close diff --git a/micropython/utarfile/utarfile/__init__.py b/micropython/utarfile/utarfile/__init__.py index 464fdd8cc..42a3b4f34 100644 --- a/micropython/utarfile/utarfile/__init__.py +++ b/micropython/utarfile/utarfile/__init__.py @@ -1,12 +1,144 @@ -from .utarfile import * +"""Subset of cpython tarfile class methods needed to decode tar files.""" -try: - from .write import TarInfoWrite, TarFileWrite +import uctypes + +# Minimal set of tar header fields for reading. +# http://www.gnu.org/software/tar/manual/html_node/Standard.html +_TAR_HEADER = { + "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), + "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 12), +} + +DIRTYPE = const("dir") +REGTYPE = const("file") + +# Constants for TarInfo.isdir, isreg. +_S_IFMT = const(0o170000) +_S_IFREG = const(0o100000) +_S_IFDIR = const(0o040000) + +_BLOCKSIZE = const(512) # length of processing blocks + + +def _roundup(val, align): + return (val + align - 1) & ~(align - 1) + + +class FileSection: + def __init__(self, f, content_len, aligned_len): + self.f = f + self.content_len = content_len + self.align = aligned_len - content_len + + def read(self, sz=65536): + if self.content_len == 0: + return b"" + if sz > self.content_len: + sz = self.content_len + data = self.f.read(sz) + sz = len(data) + self.content_len -= sz + return data + + def readinto(self, buf): + if self.content_len == 0: + return 0 + if len(buf) > self.content_len: + buf = memoryview(buf)[: self.content_len] + sz = self.f.readinto(buf) + self.content_len -= sz + return sz + + def skip(self): + sz = self.content_len + self.align + if sz: + buf = bytearray(16) + while sz: + s = min(sz, 16) + self.f.readinto(buf, s) + sz -= s + + +class TarInfo: + def __init__(self, name=""): + self.name = name + self.mode = _S_IFDIR if self.name[-1] == "/" else _S_IFREG + + @property + def type(self): + return DIRTYPE if self.isdir() else REGTYPE + + def __str__(self): + return "TarInfo(%r, %s, %d)" % (self.name, self.type, self.size) - for method in TarInfoWrite.added_methods: - setattr(TarInfo, method, getattr(TarInfoWrite, method)) - for method in TarFileWrite.added_methods: - setattr(TarFile, method, getattr(TarFileWrite, method)) + def isdir(self): + return (self.mode & _S_IFMT) == _S_IFDIR + def isreg(self): + return (self.mode & _S_IFMT) == _S_IFREG + + +class TarFile: + def __init__(self, name=None, mode="r", fileobj=None): + self.subf = None + self.mode = mode + self.offset = 0 + if mode == "r": + if fileobj: + self.f = fileobj + else: + self.f = open(name, "rb") + else: + try: + self._open_write(name=name, mode=mode, fileobj=fileobj) + except NameError: + raise NameError("Install utarfile-write") + + def __enter__(self): + return self + + def __exit__(self, unused_type, unused_value, unused_traceback): + self.close() + + def next(self): + if self.subf: + self.subf.skip() + buf = self.f.read(_BLOCKSIZE) + if not buf: + return None + + h = uctypes.struct(uctypes.addressof(buf), _TAR_HEADER, uctypes.LITTLE_ENDIAN) + + # Empty block means end of archive + if h.name[0] == 0: + return None + + # Update the offset once we're sure it's not the run-out. + self.offset += len(buf) + d = TarInfo(str(h.name, "utf-8").rstrip("\0")) + d.size = int(bytes(h.size), 8) + self.subf = d.subf = FileSection(self.f, d.size, _roundup(d.size, _BLOCKSIZE)) + self.offset += _roundup(d.size, _BLOCKSIZE) + return d + + def __iter__(self): + return self + + def __next__(self): + v = self.next() + if v is None: + raise StopIteration + return v + + def extractfile(self, tarinfo): + return tarinfo.subf + + def close(self): + self.f.close() + + +try: + # This will add additional methods to TarFile. + from . import write except ImportError: pass diff --git a/micropython/utarfile/utarfile/utarfile.py b/micropython/utarfile/utarfile/utarfile.py deleted file mode 100644 index 84dcfb36b..000000000 --- a/micropython/utarfile/utarfile/utarfile.py +++ /dev/null @@ -1,117 +0,0 @@ -"""Subset of cpython tarfile class methods needed to decode tar files.""" - -import uctypes - -# Minimal set of tar header fields for reading. -# http://www.gnu.org/software/tar/manual/html_node/Standard.html -TAR_HEADER = { - "name": (uctypes.ARRAY | 0, uctypes.UINT8 | 100), - "size": (uctypes.ARRAY | 124, uctypes.UINT8 | 12), -} - -DIRTYPE = "dir" -REGTYPE = "file" - -BLOCKSIZE = 512 # length of processing blocks - - -def roundup(val, align): - return (val + align - 1) & ~(align - 1) - - -class FileSection: - def __init__(self, f, content_len, aligned_len): - self.f = f - self.content_len = content_len - self.align = aligned_len - content_len - - def read(self, sz=65536): - if self.content_len == 0: - return b"" - if sz > self.content_len: - sz = self.content_len - data = self.f.read(sz) - sz = len(data) - self.content_len -= sz - return data - - def readinto(self, buf): - if self.content_len == 0: - return 0 - if len(buf) > self.content_len: - buf = memoryview(buf)[: self.content_len] - sz = self.f.readinto(buf) - self.content_len -= sz - return sz - - def skip(self): - sz = self.content_len + self.align - if sz: - buf = bytearray(16) - while sz: - s = min(sz, 16) - self.f.readinto(buf, s) - sz -= s - - -class TarInfo: - def __init__(self, name=""): - self.name = name - self.type = DIRTYPE if self.name[-1] == "/" else REGTYPE - - def __str__(self): - return "TarInfo(%r, %s, %d)" % (self.name, self.type, self.size) - - -class TarFile: - def __init__(self, name=None, mode="r", fileobj=None): - self.subf = None - self.mode = mode - self.offset = 0 - if mode == "r": - if fileobj: - self.f = fileobj - else: - self.f = open(name, "rb") - else: - try: - self._open_write(name=name, mode=mode, fileobj=fileobj) - except NameError: - raise NameError("Install utarfile-write") - - def next(self): - if self.subf: - self.subf.skip() - buf = self.f.read(BLOCKSIZE) - if not buf: - return None - - h = uctypes.struct(uctypes.addressof(buf), TAR_HEADER, uctypes.LITTLE_ENDIAN) - - # Empty block means end of archive - if h.name[0] == 0: - return None - - # Update the offset once we're sure it's not the run-out. - self.offset += len(buf) - d = TarInfo(str(h.name, "utf-8").rstrip("\0")) - d.size = int(bytes(h.size), 8) - self.subf = d.subf = FileSection(self.f, d.size, roundup(d.size, BLOCKSIZE)) - self.offset += roundup(d.size, BLOCKSIZE) - return d - - def __iter__(self): - return self - - def __next__(self): - v = self.next() - if v is None: - raise StopIteration - return v - - def extractfile(self, tarinfo): - return tarinfo.subf - - def TarInfo(self, name): - """Allow the TarFileCreate methods to reach TarInfo.""" - return TarInfo(name)