In [8]:
from hashlib import sha1
from Git import Blob, Tree, Commit, Reference

class Git(object):
    """
    attributes
    ----------
    dir_blobs : list
        list of Blobs
    dir_trees : list
        list of Trees
    dir_commits : list
        list of Commits
    Master : Reference
        represents Master ref
    Head : Reference
        initially points at Master ref but could point at Commits
    """
    def __init__(self):
        self.index_file = {}  # which files should git track? only tracks fnames and corresponding blob id
        self.dir_blobs = []  # blob representations of tracked files. all blobs are kept, even out of date blobs.
        self.dir_trees = []
        self.dir_commits = []
        self.master = Reference("master")  # think of master as a label that points to the latest commit
        self.head = Reference("head", ref=self.master)
        

    def add(self, fname):
        """
        first convert file to a blob object (which is just a sha1 compressed version
        of the file), then add fname and its corresponding blobd id to git's index.
        
        parameters
        ----------
        fname : str
            fname of file to be tracked.
        """
        b = self._create_blob(fname)
        self.dir_blobs.append(b)  # TODO: write to disk
        self.index_file[fname] = b.id  # tell git to track this file going forward

    def _create_blob(self, fname):
        """create new blob object.
        """
        fh = open(fname, "r").read()
        return Blob(fh)

    def commit(self, msg):
        """
        (1) create a Tree object from the current index. this is important because the index
        will change (eg, because there is a new file to be tracked or a tracked file has been
        edited). 
        
        (2) create a Commit object.
        
        (3) points branch to commit. points the current branch at the new commit object.
        """
        # step 1: create a Tree object, the "official snapshot" of current index
        t = Tree(self.index_file)
        self.dir_trees.append(t)
        
        # step 2: create Commit obect
        
        # if this is the 1st commit, then Commit's parent is the newly created Tree
        if self.master.reference == None:
            parent_obj = t
        
        # chase pointer until we find a Commit or Tree object
        else:
            parent_obj = self.head
            while type(parent_obj) == Reference:
                parent_obj = parent_obj.reference
        
        # now we can create a new Commit object
        new_commit_obj = Commit(t.id, msg, parent_obj)
        self.dir_commits.append(new_commit_obj)
        
        # step 3: master branch points to latest commit object
        self.master.reference = new_commit_obj

In [9]:
g = Git()

In [10]:
# create a new file called "letter.txt"
!printf 'a' > letter.txt
!cat letter.txt

a

In [11]:
# add letter.txt to index, a list of files to track
g.add("letter.txt")
print 'blob id for letter.txt: ', g.dir_blobs[0].id

blob id for letter.txt:  86f7e437faa5a7fce15d1ddcb9eaeaea377667b8


In [12]:
print "git is tracking: ", g.index_file  # index also remembers blob id

git is tracking:  {'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8'}


### lets add another file to working directory

In [13]:
!printf '1234' > number.txt
!cat number.txt

1234

In [14]:
g.add("number.txt")

In [15]:
for b in g.dir_blobs:
    print b.id

86f7e437faa5a7fce15d1ddcb9eaeaea377667b8
7110eda4d09e062aa5e4a390b0a572ac0d2c0220


In [16]:
# which files am i tracking?
g.index_file

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
 'number.txt': '7110eda4d09e062aa5e4a390b0a572ac0d2c0220'}

### update existing file

In [17]:
!printf '1' > number.txt
!cat number.txt

1

In [18]:
g.add("number.txt")

In [19]:
# new, up to date blob reflected in index
g.index_file

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
 'number.txt': '356a192b7913b04c54574d18c28d46e6395428ab'}

In [20]:
# but now we have 3 blobs
for blob in g.dir_blobs:
    print blob.id

86f7e437faa5a7fce15d1ddcb9eaeaea377667b8
7110eda4d09e062aa5e4a390b0a572ac0d2c0220
356a192b7913b04c54574d18c28d46e6395428ab


### 1st commit

In [21]:
# commit
g.commit('a1')

In [22]:
# commit creates a tree
g.dir_trees[0].index

'{"letter.txt": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "number.txt": "356a192b7913b04c54574d18c28d46e6395428ab"}'

In [23]:
# commit also creates a Commit object
A1 = g.dir_commits[0]
print "commit id: ", A1.id

commit id:  e1f5fb5935a89acbfad9ed3558d7860b861e39f1


In [24]:
# since this is the first commit, the Commit object's parent is the tree
A1.parent_obj.ref_type, A1.parent_obj.id

('Tree', '17b9b5c5da3011c16b730263948e79390fc50b2b')

In [25]:
print "A1 parent: ", A1.parent_obj.index

A1 parent:  {"letter.txt": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "number.txt": "356a192b7913b04c54574d18c28d46e6395428ab"}


In [26]:
# master branch now points at latest commit object a1
g.master.reference.msg

'a1'

In [27]:
# head branch points at master
g.head.reference.ref_type

'master'

### 2nd commit

In [28]:
# modify "number.txt", a tracked file
!printf '2' > number.txt

In [29]:
g.add("number.txt")  # add will update index
g.index_file  # blob id for number.txt should change

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
 'number.txt': 'da4b9237bacccdf19c0760cab7aec4a8359010b0'}

In [30]:
# now we should have 4 blob objects
for blob in g.dir_blobs:
    print blob.id

86f7e437faa5a7fce15d1ddcb9eaeaea377667b8
7110eda4d09e062aa5e4a390b0a572ac0d2c0220
356a192b7913b04c54574d18c28d46e6395428ab
da4b9237bacccdf19c0760cab7aec4a8359010b0


In [31]:
g.commit('a2')

In [32]:
# (1) committing creates a new Tree from the updated index
for tree in g.dir_trees:
    print tree.index

{"letter.txt": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "number.txt": "356a192b7913b04c54574d18c28d46e6395428ab"}
{"letter.txt": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "number.txt": "da4b9237bacccdf19c0760cab7aec4a8359010b0"}


In [33]:
# (2) a new Commit object is created
A2 = g.dir_commits[1]
print A2.msg

a2


In [34]:
# commit object tracks the latest tree
print "actual tree hash: ", g.dir_trees[1].id
print "tree hash, according to Commit: ", A2.tree_id

actual tree hash:  bced68531359468d07445ba401750f965697fb90
tree hash, according to Commit:  bced68531359468d07445ba401750f965697fb90


In [35]:
# Commit A2's parent is the previous Commit_A1
A2.parent_obj.msg

'a1'

In [36]:
# Master points at A2
g.master.reference.msg

'a2'

In [37]:
# head still points at master
g.head.reference.ref_type

'master'