Skip to content

Commit

Permalink
odb: Avoid quadratic string building when rendering bodies
Browse files Browse the repository at this point in the history
  • Loading branch information
rwe committed Jan 10, 2022
1 parent 7dc58f9 commit df076c0
Showing 1 changed file with 43 additions and 21 deletions.
64 changes: 43 additions & 21 deletions gitrevise/odb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import re
import os
from typing import (
Iterator,
TypeVar,
Type,
Dict,
Expand Down Expand Up @@ -73,7 +74,9 @@ def short(self) -> str:
def for_object(cls, tag: str, body: bytes) -> Oid:
"""Hash an object with the given type tag and body to determine its Oid"""
hasher = hashlib.sha1()
hasher.update(tag.encode() + b" " + str(len(body)).encode() + b"\0" + body)
hasher.update(f"{tag} {len(body)}".encode())
hasher.update(b"\0")
hasher.update(body)
return cls(hasher.digest())

def __repr__(self) -> str:
Expand Down Expand Up @@ -298,24 +301,41 @@ def new_commit(
"""Directly create an in-memory commit object, without persisting it.
If a commit object with these properties already exists, it will be
returned instead."""
if author is None:
author = self.default_author
if committer is None:
committer = self.default_committer

body = b"tree " + tree.oid.hex().encode() + b"\n"
for parent in parents:
body += b"parent " + parent.oid.hex().encode() + b"\n"
body += b"author " + author.replace(b"\n", b"\n ") + b"\n"
body += b"committer " + committer.replace(b"\n", b"\n ") + b"\n"

body_tail = b"\n" + message
if self.sign_commits:
gpgsig = self.get_gpgsig(body + body_tail)
body += b"gpgsig " + gpgsig.replace(b"\n", b"\n ") + b"\n"
body += body_tail

return Commit(self, body)
def header_kvs(gpgsig: Optional[bytes]) -> Iterator[Tuple[bytes, bytes]]:
"""Yields each header name and value."""
yield b"tree", tree.oid.hex().encode()
yield from ((b"parent", p.oid.hex().encode()) for p in parents)
yield b"author", author or self.default_author
yield b"committer", committer or self.default_committer
if gpgsig:
yield b"gpgsig", gpgsig

def body_parts(gpgsig: Optional[bytes]) -> Iterator[bytes]:
"""Yields each chunk of the body for rendering into a contiguous buffer."""
for key, value in header_kvs(gpgsig=gpgsig):
# Key, space, value (with embedded newlines indented by space), newline.
yield from (key, b" ", value.replace(b"\n", b"\n "), b"\n")
yield b"\n"
yield message

def build(gpgsig: Optional[bytes] = None) -> bytes:
"""Render the body, optionally including the given gpgsig header."""
return b"".join(body_parts(gpgsig=gpgsig))

def get_body() -> bytes:
# Generate the unsigned body.
unsigned_body = build()
if not self.sign_commits:
return unsigned_body

# Get the signature for the unsigned body.
gpgsig = self.get_gpgsig(unsigned_body)

# Include the signature as a header in the final body.
return build(gpgsig=gpgsig)

return Commit(self, body=get_body())

def get_gpgsig(self, buffer: bytes) -> bytes:
"""Return the text of the signed commit object."""
Expand Down Expand Up @@ -358,9 +378,11 @@ def entry_key(pair: Tuple[bytes, Entry]) -> bytes:
return name + b"/"
return name

body = b""
for name, entry in sorted(entries.items(), key=entry_key):
body += cast(bytes, entry.mode.value) + b" " + name + b"\0" + entry.oid
body = b"".join(
field
for name, entry in sorted(entries.items(), key=entry_key)
for field in (cast(bytes, entry.mode.value), b" ", name, b"\0", entry.oid)
)
return Tree(self, body)

def get_obj(self, ref: Union[Oid, str]) -> GitObj:
Expand Down

0 comments on commit df076c0

Please sign in to comment.