Permalink
Browse files

index.write_tree: initial version implemented, although its not yet w…

…orking correctly, a test to explicitly compare the git version with the python implementation is still missing

Tree and Index internally use 20 byte shas, converting them only as needed to reduce memory footprint and processing time
objects: started own 'fun' module containing the most important tree functions, more are likely to be added soon
  • Loading branch information...
Byron committed Jun 21, 2010
1 parent 1044116 commit 69dd8750be1fbf55010a738dc1ced4655e727f23
View
@@ -5,6 +5,13 @@ CHANGES
===
* ConcurrentWriteOperation was removed, and replaced by LockedFD
* IndexFile.get_entries_key was renamed to entry_key
+ * IndexEntry instances contained in IndexFile.entries now use binary sha's. Use
+ the .hexsha property to obtain the hexadecimal version
+ * IndexFile.write_tree: removed missing_ok keyword, its always True now
+ Instead of raising GitCommandError it raises UnmergedEntriesError
+ * diff.Diff.null_hex_sha renamed to NULL_HEX_SHA, to be conforming with
+ the naming in the Object base class
+
0.2 Beta 2
===========
View
@@ -28,7 +28,7 @@ def _init_externals():
from git.objects import *
from git.refs import *
from git.diff import *
-from git.errors import InvalidGitRepositoryError, NoSuchPathError, GitCommandError
+from git.errors import *
from git.cmd import Git
from git.repo import Repo
from git.remote import *
View
@@ -4,9 +4,12 @@
OStream
)
+from gitdb.util import to_hex_sha
+
from gitdb.db import GitDB
from gitdb.db import LooseObjectDB
+
__all__ = ('GitCmdObjectDB', 'GitDB' )
#class GitCmdObjectDB(CompoundDB, ObjectDBW):
@@ -24,11 +27,11 @@ def __init__(self, root_path, git):
self._git = git
def info(self, sha):
- t = self._git.get_object_header(sha)
+ t = self._git.get_object_header(to_hex_sha(sha))
return OInfo(*t)
def stream(self, sha):
"""For now, all lookup is done by git itself"""
- t = self._git.stream_object_data(sha)
+ t = self._git.stream_object_data(to_hex_sha(sha))
return OStream(*t)
View
@@ -196,7 +196,7 @@ class Diff(object):
\.\.(?P<b_blob_id>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
""", re.VERBOSE | re.MULTILINE)
# can be used for comparisons
- null_hex_sha = "0"*40
+ NULL_HEX_SHA = "0"*40
__slots__ = ("a_blob", "b_blob", "a_mode", "b_mode", "new_file", "deleted_file",
"rename_from", "rename_to", "diff")
View
@@ -6,43 +6,51 @@
""" Module containing all exceptions thrown througout the git package, """
class InvalidGitRepositoryError(Exception):
- """ Thrown if the given repository appears to have an invalid format. """
+ """ Thrown if the given repository appears to have an invalid format. """
class NoSuchPathError(OSError):
- """ Thrown if a path could not be access by the system. """
+ """ Thrown if a path could not be access by the system. """
class GitCommandError(Exception):
- """ Thrown if execution of the git command fails with non-zero status code. """
- def __init__(self, command, status, stderr=None):
- self.stderr = stderr
- self.status = status
- self.command = command
-
- def __str__(self):
- return ("'%s' returned exit status %i: %s" %
- (' '.join(str(i) for i in self.command), self.status, self.stderr))
+ """ Thrown if execution of the git command fails with non-zero status code. """
+ def __init__(self, command, status, stderr=None):
+ self.stderr = stderr
+ self.status = status
+ self.command = command
+
+ def __str__(self):
+ return ("'%s' returned exit status %i: %s" %
+ (' '.join(str(i) for i in self.command), self.status, self.stderr))
class CheckoutError( Exception ):
- """Thrown if a file could not be checked out from the index as it contained
- changes.
-
- The .failed_files attribute contains a list of relative paths that failed
- to be checked out as they contained changes that did not exist in the index.
-
- The .failed_reasons attribute contains a string informing about the actual
- cause of the issue.
-
- The .valid_files attribute contains a list of relative paths to files that
- were checked out successfully and hence match the version stored in the
- index"""
- def __init__(self, message, failed_files, valid_files, failed_reasons):
- Exception.__init__(self, message)
- self.failed_files = failed_files
- self.failed_reasons = failed_reasons
- self.valid_files = valid_files
-
- def __str__(self):
- return Exception.__str__(self) + ":%s" % self.failed_files
+ """Thrown if a file could not be checked out from the index as it contained
+ changes.
+
+ The .failed_files attribute contains a list of relative paths that failed
+ to be checked out as they contained changes that did not exist in the index.
+
+ The .failed_reasons attribute contains a string informing about the actual
+ cause of the issue.
+
+ The .valid_files attribute contains a list of relative paths to files that
+ were checked out successfully and hence match the version stored in the
+ index"""
+ def __init__(self, message, failed_files, valid_files, failed_reasons):
+ Exception.__init__(self, message)
+ self.failed_files = failed_files
+ self.failed_reasons = failed_reasons
+ self.valid_files = valid_files
+
+ def __str__(self):
+ return Exception.__str__(self) + ":%s" % self.failed_files
+
+
+class CacheError(Exception):
+ """Base for all errors related to the git index, which is called cache internally"""
+
+class UnmergedEntriesError(CacheError):
+ """Thrown if an operation cannot proceed as there are still unmerged
+ entries in the cache"""
View
@@ -5,13 +5,13 @@
# the BSD License: http://www.opensource.org/licenses/bsd-license.php
"""Module containing Index implementation, allowing to perform all kinds of index
manipulations such as querying and merging. """
-import binascii
import tempfile
import os
import sys
import subprocess
import glob
from cStringIO import StringIO
+from binascii import b2a_hex
from stat import (
S_ISLNK,
@@ -25,16 +25,12 @@
from typ import (
BaseIndexEntry,
IndexEntry,
- CE_NAMEMASK,
- CE_STAGESHIFT
)
from util import (
TemporaryFileSwap,
post_clear_cache,
default_index,
- pack,
- unpack
)
import git.objects
@@ -60,20 +56,17 @@
LockedFD,
join_path_native,
file_contents_ro,
- LockFile
- )
-
-
-from gitdb.base import (
- IStream
)
from fun import (
write_cache,
read_cache,
+ write_tree_from_cache,
entry_key
)
+from gitdb.base import IStream
+
__all__ = ( 'IndexFile', 'CheckoutError' )
@@ -161,10 +154,15 @@ def _deserialize(self, stream):
self.version, self.entries, self._extension_data, conten_sha = read_cache(stream)
return self
- def _serialize(self, stream, ignore_tree_extension_data=False):
+ def _entries_sorted(self):
+ """:return: list of entries, in a sorted fashion, first by path, then by stage"""
entries_sorted = self.entries.values()
- entries_sorted.sort(key=lambda e: (e[3], e.stage)) # use path/stage as sort key
- write_cache(entries_sorted,
+ entries_sorted.sort(key=lambda e: (e.path, e.stage)) # use path/stage as sort key
+ return entries_sorted
+
+ def _serialize(self, stream, ignore_tree_extension_data=False):
+ entries = self._entries_sorted()
+ write_cache(entries,
stream,
(ignore_tree_extension_data and None) or self._extension_data)
return self
@@ -403,7 +401,7 @@ def iter_blobs(self, predicate = lambda t: True):
# TODO: is it necessary to convert the mode ? We did that when adding
# it to the index, right ?
mode = self._stat_mode_to_index_mode(entry.mode)
- blob = Blob(self.repo, entry.sha, mode, entry.path)
+ blob = Blob(self.repo, entry.hexsha, mode, entry.path)
blob.size = entry.size
output = (entry.stage, blob)
if predicate(output):
@@ -490,33 +488,31 @@ def update(self):
# allows to lazily reread on demand
return self
- def _write_tree(self, missing_ok=False):
+ def write_tree(self):
"""Writes this index to a corresponding Tree object into the repository's
object database and return it.
-
- :param missing_ok:
- If True, missing objects referenced by this index will not result
- in an error.
-
- :return: Tree object representing this index"""
+
+ :return: Tree object representing this index
+ :note: The tree will be written even if one or more objects the tree refers to
+ does not yet exist in the object database. This could happen if you added
+ Entries to the index directly.
+ :raise ValueError: if there are no entries in the cache
+ :raise UnmergedEntriesError: """
# we obtain no lock as we just flush our contents to disk as tree
if not self.entries:
raise ValueError("Cannot write empty index")
+ # TODO: use memory db, this helps to prevent IO if the resulting tree
+ # already exists
+ entries = self._entries_sorted()
+ binsha, tree_items = write_tree_from_cache(entries, self.repo.odb, slice(0, len(entries)))
+ # note: additional deserialization could be saved if write_tree_from_cache
+ # would return sorted tree entries
+ root_tree = Tree(self.repo, b2a_hex(binsha), path='')
+ root_tree._cache = tree_items
+ return root_tree
- return Tree(self.repo, tree_sha, 0, '')
-
- def write_tree(self, missing_ok = False):
- index_path = self._index_path()
- tmp_index_mover = TemporaryFileSwap(index_path)
-
- self.write(index_path, ignore_tree_extension_data=True)
- tree_sha = self.repo.git.write_tree(missing_ok=missing_ok)
-
- del(tmp_index_mover) # as soon as possible
- return Tree(self.repo, tree_sha, 0, '')
-
def _process_diff_args(self, args):
try:
args.pop(args.index(self))
@@ -525,7 +521,6 @@ def _process_diff_args(self, args):
# END remove self
return args
-
def _to_relative_path(self, path):
""":return: Version of path relative to our git directory or raise ValueError
if it is not within our git direcotory"""
@@ -599,7 +594,7 @@ def add(self, items, force=True, fprogress=lambda *args: None, path_rewriter=Non
- BaseIndexEntry or type
Handling equals the one of Blob objects, but the stage may be
- explicitly set.
+ explicitly set. Please note that Index Entries require binary sha's.
:param force:
If True, otherwise ignored or excluded files will be
@@ -666,7 +661,7 @@ def store_path(filepath):
fprogress(filepath, True, filepath)
return BaseIndexEntry((self._stat_mode_to_index_mode(st.st_mode),
- istream.sha, 0, filepath))
+ istream.binsha, 0, filepath))
# END utility method
@@ -691,14 +686,14 @@ def store_path(filepath):
# HANLDE ENTRY OBJECT CREATION
# create objects if required, otherwise go with the existing shas
- null_entries_indices = [ i for i,e in enumerate(entries) if e.sha == Object.NULL_HEX_SHA ]
+ null_entries_indices = [ i for i,e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA ]
if null_entries_indices:
for ei in null_entries_indices:
null_entry = entries[ei]
new_entry = store_path(null_entry.path)
# update null entry
- entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.sha, null_entry.stage, null_entry.path))
+ entries[ei] = BaseIndexEntry((null_entry.mode, new_entry.binsha, null_entry.stage, null_entry.path))
# END for each entry index
# END null_entry handling
@@ -707,7 +702,7 @@ def store_path(filepath):
# all object sha's
if path_rewriter:
for i,e in enumerate(entries):
- entries[i] = BaseIndexEntry((e.mode, e.sha, e.stage, path_rewriter(e)))
+ entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e)))
# END for each entry
# END handle path rewriting
Oops, something went wrong.

0 comments on commit 69dd875

Please sign in to comment.