Skip to content

Commit

Permalink
Fix performance issues with large binary files
Browse files Browse the repository at this point in the history
Specifically, pack and unpack PropWords properties with list
comprehensions rather than appending each word to a bytes object
(creating a new object each time), or to an array.

Also use struct.Struct to avoid having to compile the format millions of
times.

The changes to writing out the dtb make a factor of at least x1000 when
writing out a 20Mb file ("at least" because I lost patience).  The
changes to reading in are much less dramatic - but it's still at least
x3.  I can read in and write out the 20Mb file in about 1.5s now.
  • Loading branch information
Martin Bonner committed Feb 17, 2021
1 parent 84bca0f commit 1ca1040
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions fdt/items.py
Expand Up @@ -12,12 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from struct import unpack, pack
from struct import pack, Struct
from string import printable

from .header import Header, DTB_PROP, DTB_BEGIN_NODE, DTB_END_NODE
from .misc import is_string, line_offset

BIGENDIAN_WORD = Struct(">I")

########################################################################################################################
# Helper methods
Expand All @@ -41,11 +42,10 @@ def new_property(name: str, raw_value: bytes) -> object:
elif len(raw_value) and len(raw_value) % 4 == 0:
obj = PropWords(name)
# Extract words from raw value
for i in range(0, len(raw_value), 4):
obj.append(unpack(">I", raw_value[i:i + 4])[0])
obj.data = [BIGENDIAN_WORD.unpack(raw_value[i:i + 4])[0] for i in range(0, len(raw_value), 4)]
return obj

elif len(raw_value) and len(raw_value):
elif len(raw_value):
return PropBytes(name, data=raw_value)

else:
Expand Down Expand Up @@ -342,8 +342,7 @@ def to_dtb(self, strings: str, pos: int = 0, version: int = Header.MAX_VERSION):
strpos = len(strings)
strings += self.name + '\0'
blob = pack('>III', DTB_PROP, len(self.data) * 4, strpos)
for word in self.data:
blob += pack('>I', word)
blob += bytes().join([BIGENDIAN_WORD.pack(word) for word in self.data])
pos += len(blob)
return blob, strings, pos

Expand Down

0 comments on commit 1ca1040

Please sign in to comment.