In [45]:
from hashlib import sha1
import Blob
from Tree import Tree
from Commit import Commit
from Reference import Reference

class Git(object):
    """
    attributes
    ----------
    dir_blobs : list
        list of Blobs
    dir_trees : list
        list of Trees
    dir_commits : list
        list of Commits
    Master : Reference
        represents Master ref
    Head : Reference
        initially points at Master ref but could point at Commits
    """
    def __init__(self):
        self.index_file = {}
        self.dir_blobs = []
        self.dir_trees = []
        self.dir_commits = []
        self.master = Reference("master")
        self.head = Reference("head", ref=self.master)
        

    def add(self, fname):
        """
        creates blob object, and then adds fname/hash to index.
        
        parameters
        ----------
        fname : str
            fname of file to be tracked.
        """
        b = self._create_blob(fname)
        self.dir_blobs.append(b)  # TODO: write to disk
        self.index_file[fname] = b.id

    def _create_blob(self, fname):
        """create new blob object.
        """
        fh = open(fname, "r").read()
        return Blob.Blob(fh)

    def commit(self, msg):
        """
        (1) create tree graph from index: represent the content of the version of the project being committed
        (2) create commit object
        (3) points branch to commit. points the current branch at the new commit object.
        """
        # step 1: create Tree, which is just an official snapshot of index
        t = Tree(self.index_file)
        self.dir_trees.append(t)
        
        # step 2: create Commit
        
        # if this is the 1st commit, then commit's parent is the tree
        if self.master.reference == None:
            parent_obj = t
        
        # chase pointer until we find a Commit or Tree object
        else:
            parent_obj = self.head
            while type(parent_obj) == Reference:
                parent_obj = parent_obj.reference
        
        # now we can create a new Commit object
        new_commit_obj = Commit(t.id, msg, parent_obj)
        self.dir_commits.append(new_commit_obj)
        
        # step 3: master branch points to latest commit object
        self.master.reference = new_commit_obj

In [46]:
g = Git()

In [47]:
!printf 'a' > letter.txt
!cat letter.txt

a

In [48]:
# add to index, which maintains a list of things to track
g.add("letter.txt")
print 'blob id: ', g.dir_blobs[0].id

blob id:  86f7e437faa5a7fce15d1ddcb9eaeaea377667b8


In [49]:
g.index_file

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8'}

### lets add another file to working directory

In [50]:
!printf '1234' > number.txt
!cat number.txt

1234

In [51]:
g.add("number.txt")

In [52]:
for b in g.dir_blobs:
    print b.id

86f7e437faa5a7fce15d1ddcb9eaeaea377667b8
7110eda4d09e062aa5e4a390b0a572ac0d2c0220


In [53]:
g.index_file

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
 'number.txt': '7110eda4d09e062aa5e4a390b0a572ac0d2c0220'}

### update existing file

In [54]:
!printf '1' > number.txt
!cat number.txt

1

In [55]:
g.add("number.txt")

In [56]:
# new file reflected in index
g.index_file

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
 'number.txt': '356a192b7913b04c54574d18c28d46e6395428ab'}

In [57]:
# but now we have 3 blobs
for b in g.dir_blobs:
    print b.id

86f7e437faa5a7fce15d1ddcb9eaeaea377667b8
7110eda4d09e062aa5e4a390b0a572ac0d2c0220
356a192b7913b04c54574d18c28d46e6395428ab


### 1st commit

In [58]:
# commit
g.commit('a1')

In [59]:
# since this is the first commit, the Commit object's parent is the tree
A1 = g.dir_commits[0]
A1.parent_obj.ref_type

'Tree'

In [60]:
print "A1 parent: ", A1.parent_obj.index

A1 parent:  {"letter.txt": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "number.txt": "356a192b7913b04c54574d18c28d46e6395428ab"}


In [61]:
# master branch now points at latest commit object a1
g.master.reference.msg

'a1'

In [62]:
# head branch points at master
g.head.reference.ref_type

'master'

### 2nd commit

In [63]:
!printf '2' > number.txt

In [64]:
g.add("number.txt")

In [65]:
# again, index file reflects working copy
g.index_file

{'letter.txt': '86f7e437faa5a7fce15d1ddcb9eaeaea377667b8',
 'number.txt': 'da4b9237bacccdf19c0760cab7aec4a8359010b0'}

In [66]:
# but now we should have 4 blob objects
for b in g.dir_blobs:
    print b.id

86f7e437faa5a7fce15d1ddcb9eaeaea377667b8
7110eda4d09e062aa5e4a390b0a572ac0d2c0220
356a192b7913b04c54574d18c28d46e6395428ab
da4b9237bacccdf19c0760cab7aec4a8359010b0


In [67]:
g.commit('a2')

In [68]:
# first, when commit, a new tree graph is created to represent the content of the index
g.dir_trees[1].index

'{"letter.txt": "86f7e437faa5a7fce15d1ddcb9eaeaea377667b8", "number.txt": "da4b9237bacccdf19c0760cab7aec4a8359010b0"}'

In [69]:
# second, a new commit object is created
A2 = g.dir_commits[1]
print A2.msg

a2


In [71]:
# commit object tracks the latest tree
print "actual tree hash: ", g.dir_trees[1].id
print "tree hash, according to Commit: ", A2.tree_id

actual tree hash:  bced68531359468d07445ba401750f965697fb90
tree hash, according to Commit:  bced68531359468d07445ba401750f965697fb90


In [75]:
# Commit A2's parent is the previous Commit_A1
A2.parent_obj.msg

'a1'

In [76]:
# Master points at A2
g.master.reference.msg

'a2'

In [77]:
# head still points at master
g.head.reference.ref_type

'master'