From 9dcbdf93f93c29aac95349232d946510456a8e53 Mon Sep 17 00:00:00 2001 From: Matthias Georgi Date: Sun, 4 Jan 2009 13:04:57 +0100 Subject: [PATCH] better concurrency. better scaling. 20000 entries in 1000 folders still performant. --- git_store.gemspec | 6 +- lib/git_store.rb | 305 +++++++++++-------------------- lib/git_store/blob.rb | 84 +++++++++ lib/git_store/handlers.rb | 57 ++++++ lib/git_store/tree.rb | 151 +++++++++++++++ test/benchmark.rb | 24 +++ {spec => test}/git_store_spec.rb | 72 +++++--- 7 files changed, 477 insertions(+), 222 deletions(-) create mode 100644 lib/git_store/blob.rb create mode 100644 lib/git_store/handlers.rb create mode 100644 lib/git_store/tree.rb create mode 100644 test/benchmark.rb rename {spec => test}/git_store_spec.rb (62%) diff --git a/git_store.gemspec b/git_store.gemspec index dfcefad..683edbe 100644 --- a/git_store.gemspec +++ b/git_store.gemspec @@ -16,7 +16,11 @@ LICENSE README.md git_store.gemspec lib/git_store.rb -spec/git_store_spec.rb +lib/git_store/blob.rb +lib/git_store/tree.rb +lib/git_store/handlers.rb +test/git_store_spec.rb +test/benchmark.rb } end diff --git a/lib/git_store.rb b/lib/git_store.rb index d8b62d1..f7423c2 100644 --- a/lib/git_store.rb +++ b/lib/git_store.rb @@ -1,240 +1,157 @@ +require 'rubygems' require 'grit' +require 'yaml' -# This fix ensures sorted yaml maps. -class Hash - def to_yaml( opts = {} ) - YAML::quick_emit( object_id, opts ) do |out| - out.map( taguri, to_yaml_style ) do |map| - sort_by { |k, v| k.to_s }.each do |k, v| - map.add( k, v ) - end - end - end - end -end +require 'git_store/blob' +require 'git_store/tree' +require 'git_store/handlers' class GitStore + include Enumerable - class DefaultHandler - def read(name, data) - data - end + attr_reader :repo, :index, :root, :last_commit - def write(data) - data - end + def initialize(path = '.') + @repo = Grit::Repo.new(path) + @root = Tree.new('') + load_last_commit end - - class YAMLHandler - def read(name, data) - YAML.load(data) - end - def write(data) - data.to_yaml - end + def load_last_commit + @last_commit = @repo.commits('master', 1)[0] end - - class RubyHandler - def read(name, data) - Object.module_eval(data) - end + + def commit(message="") + head = repo.heads.first + commit_index(message, head ? head.commit.id : nil) end - - class ERBHandler - def read(name, data) - ERB.new(data) - end + + def [](*args) + root[*args] end - Handler = { - 'yml' => YAMLHandler.new, - 'rhtml' => ERBHandler.new, - 'rxml' => ERBHandler.new, - 'rb' => RubyHandler.new - } + def []=(*args) + value = args.pop + root[*args] = value + end - Handler.default = DefaultHandler.new + def delete(path) + root.delete(path) + end - class Blob + def load + root.load(repo.tree) + end - attr_reader :id - attr_accessor :name + def each(&block) + root.each(&block) + end - def initialize(*args) - if args.first.is_a?(Grit::Blob) - @blob = args.first - @name = @blob.name - else - @name = args[0] - self.data = args[1] - end - end + def changed? + commit = repo.commits('master', 1)[0] + commit and (last_commit.nil? or last_commit.id != commit.id) + end - def extname - File.extname(name)[1..-1] - end + def refresh! + load if changed? + end - def load(data) - @data = handler.read(name, data) - end + def start_transaction(head = 'master') + @lock = open("#{repo.path}/refs/heads/#{head}.lock", "w") + @lock.flock(File::LOCK_EX) + end - def handler - Handler[extname] - end + def commit_index(message, parents = nil, actor = nil, head = 'master') + start_transaction(head) unless @lock + + tree_sha = write_tree(root) + + contents = [] + contents << ['tree', tree_sha].join(' ') - def data - @data or (@blob and load(@blob.data)) + if parents + parents.each do |p| + contents << ['parent', p].join(' ') if p + end end - def data=(data) - @data = data + if actor + name = actor.name + email = actor.email + else + config = Grit::Config.new(self.repo) + name = config['user.name'] + email = config['user.email'] end - - def to_s - if handler.respond_to?(:write) - handler.write(data) - else - @blob.data - end + + author_string = "#{name} <#{email}> #{Time.now.to_i} -0700" + contents << ['author', author_string].join(' ') + contents << ['committer', author_string].join(' ') + contents << '' + contents << message + + commit_sha = put_raw_object(contents.join("\n"), 'commit') + + open("#{repo.path}/refs/heads/#{head}", "w") do |file| + file.write(commit_sha) end + commit_sha + ensure + @lock.close if @lock + @lock = nil + File.unlink("#{repo.path}/refs/heads/#{head}.lock") rescue nil end - class Tree - include Enumerable + def put_raw_object(data, type) + repo.git.ruby_git.put_raw_object(data, type) + end - attr_reader :data - attr_accessor :name + def write_blob(blob) + return if not blob.modified? - def initialize(name = nil) - @data = {} - @name = name - end - - def load(tree) - @name = tree.name - @data = tree.contents.inject({}) do |hash, file| - if file.is_a?(Grit::Tree) - hash[file.name] = (@data[file.name] || Tree.new).load(file) - else - hash[file.name] = Blob.new(file) - end - hash - end - self - end - - def inspect - "#" - end - - def fetch(name) - name = name.to_s - entry = @data[name] + blob.sha1 = put_raw_object(blob.serialize, 'blob') + blob.modified = false + end + + def write_tree(tree) + return if not tree.modified? + + contents = tree.data.map do |name, entry| case entry - when Blob then entry.data - when Tree then entry - end - end - - def store(name, value) - name = name.to_s - if value.is_a?(Tree) - value.name = name - @data[name] = value - else - @data[name] = Blob.new(name, value) + when Blob; write_blob(entry) + when Tree; write_tree(entry) end + "%s %s\0%s" % [entry.mode, name, [entry.sha1].pack("H*")] end - def has_key?(name) - @data.has_key?(name) - end + tree.modified = false + tree.sha1 = put_raw_object(contents.join, 'tree') + end + + class FileStore < GitStore - def [](*args) - args = args.first.to_s.split('/') if args.size == 1 - args.inject(self) { |tree, key| tree.fetch(key) or return nil } + attr_reader :path + + def initialize(path = '.') + @path = path + @root = Tree.new('') end - def []=(*args) - value = args.pop - args = args.first.to_s.split('/') if args.size == 1 - tree = args[0..-2].to_a.inject(self) do |tree, key| - tree.has_key?(key) ? tree.fetch(key) : tree.store(key, Tree.new(key)) - end - tree.store(args.last, value) + def load + root.load_from_disk end - def delete(name) - @data.delete(name) - end - - def each(&block) - @data.values.each do |entry| - case entry - when Blob then yield entry.data - when Tree then entry.each(&block) - end + def refresh! + root.each_blob do |blob| + end end - def each_with_path(path = [], &block) - @data.each do |name, entry| - child_path = path + [name] - case entry - when Blob then yield entry, child_path.join('/') - when Tree then entry.each_with_path(child_path, &block) - end - end - end - - def to_hash - @data.inject({}) do |hash, (name, entry)| - hash[name] = entry.is_a?(Tree) ? entry.to_hash : entry.to_s - hash - end + def commit(message="") + root.write_to_disk end end - attr_reader :repo, :index, :tree - - def initialize(path, &block) - @repo = Grit::Repo.new(path) - @index = Grit::Index.new(@repo) - @tree = Tree.new - end - - def commit(message="") - index.tree = tree.to_hash - head = repo.heads.first - index.commit(message, head ? head.commit.id : nil) - end - - def [](*args) - tree[*args] - end - - def []=(*args) - value = args.pop - tree[*args] = value - end - - def delete(path) - tree.delete(path) - end - - def load - tree.load(repo.tree) - end - - def each(&block) - tree.each(&block) - end - - def each_with_path(&block) - tree.each_with_path(&block) - end - end diff --git a/lib/git_store/blob.rb b/lib/git_store/blob.rb new file mode 100644 index 0000000..2142e4e --- /dev/null +++ b/lib/git_store/blob.rb @@ -0,0 +1,84 @@ +class GitStore + + class Blob + + attr_accessor :sha1, :mode, :path, :blob, :file, :modified + alias_method :modified?, :modified + + def initialize(data, path, modified = false) + @path = path + @modified = modified + + case data + when Grit::Blob + @blob = data + @sha1 = blob.id + @mode = blob.mode + when File + @file = data + @sha1 = Digest::SHA1.hexdigest(file.read) + @mode = '%o' % file.stat.mode + else + @data = data + @sha1 = Digest::SHA1.hexdigest(serialize) + @mode = '100644' + end + end + + def name + File.basename(path) + end + + def extname + File.extname(name)[1..-1] + end + + def load(data) + handler.read(path, data) + end + + def reload + load raw_data + end + + def raw_data + if @blob + @blob.data + elsif @file + @file.rewind + @file.read + end + end + + def handler + Handler[extname] + end + + def data + @data ||= load(raw_data) + end + + def data=(data) + @data = data + end + + def write_to_disk + if handler.respond_to?(:write) + FileUtils.mkpath(File.dirname(path)) + open(path, "w") do |io| + io << handler.write(path, data) + end + end + end + + def serialize + if handler.respond_to?(:write) + handler.write(path, data) + else + raw_data + end + end + + end + +end diff --git a/lib/git_store/handlers.rb b/lib/git_store/handlers.rb new file mode 100644 index 0000000..8956847 --- /dev/null +++ b/lib/git_store/handlers.rb @@ -0,0 +1,57 @@ + +# This fix ensures sorted yaml maps. +class Hash + def to_yaml( opts = {} ) + YAML::quick_emit( object_id, opts ) do |out| + out.map( taguri, to_yaml_style ) do |map| + sort_by { |k, v| k.to_s }.each do |k, v| + map.add( k, v ) + end + end + end + end +end + +class GitStore + + class DefaultHandler + def read(path, data) + data + end + + def write(path, data) + data.to_s + end + end + + class YAMLHandler + def read(path, data) + YAML.load(data) + end + + def write(path, data) + data.to_yaml + end + end + + class RubyHandler + def read(path, data) + Object.module_eval(data) + end + end + + class ERBHandler + def read(path, data) + ERB.new(data) + end + end + + Handler = { + 'yml' => YAMLHandler.new, + 'rhtml' => ERBHandler.new, + 'rxml' => ERBHandler.new, + 'rb' => RubyHandler.new + } + + Handler.default = DefaultHandler.new +end diff --git a/lib/git_store/tree.rb b/lib/git_store/tree.rb new file mode 100644 index 0000000..b5eff65 --- /dev/null +++ b/lib/git_store/tree.rb @@ -0,0 +1,151 @@ +class GitStore + + class Tree + include Enumerable + + attr_reader :data + attr_accessor :path, :sha1, :mode, :modified + + def initialize(path = nil, modified = false) + @data = {} + @path = path + @mode = '040000' + @modified = modified + end + + def name + File.basename(path) + end + + def modified? + @modified || @data.values.any? { |child| child.modified? } + end + + def write_to_disk + @data.each do |name, entry| + entry.write_to_disk + end + end + + def load_from_disk(path = '') + @path = path + @mode = '%o' % File.stat("./#{path}").mode + + pattern = path.empty? ? "./*" : "./#{path}/*" + + @data = Dir[pattern].inject({}) do |hash, file| + file = file[2..-1] + if file[-1, 1] != '~' + name = File.basename(file) + if File.directory?(file) + hash[name] = (@data[name] || Tree.new).load_from_disk(file) + else + hash[name] = Blob.new(File.open(file), file) + end + end + hash + end + + self + end + + def load(tree, path = '') + @path = path + @mode = tree.mode + + @data = tree.contents.inject({}) do |hash, file| + name = file.name + if file.is_a?(Grit::Tree) + hash[name] = (@data[name] || Tree.new).load(file, child_path(name)) + else + hash[name] = Blob.new(file, child_path(name)) + end + hash + end + + self + end + + def inspect + "#" + end + + def fetch(name) + name = name.to_s + entry = @data[name] + case entry + when Blob; entry.data + when Tree; entry + end + end + + def child_path(name) + path.empty? ? name : "#{path}/#{name}" + end + + def create_tree(name) + store(name, Tree.new(child_path(name))) + end + + def store(name, value) + @modified = true + name = name.to_s + + if value.is_a?(Tree) + value.path = child_path(name) + @data[name] = value + else + @data[name] = Blob.new(value, child_path(name), true) + end + end + + def has_key?(name) + @data.has_key?(name) + end + + def [](*args) + args = args.first.to_s.split('/') if args.size == 1 + args.inject(self) { |tree, key| tree.fetch(key) or return nil } + end + + def []=(*args) + value = args.pop + args = args.first.to_s.split('/') if args.size == 1 + tree = args[0..-2].to_a.inject(self) do |tree, key| + tree.has_key?(key) ? tree.fetch(key) : tree.create_tree(key) + end + tree.store(args.last, value) + end + + def delete(name) + @data.delete(name) + end + + def each(&block) + @data.sort.each do |name, entry| + case entry + when Blob; yield entry.data + when Tree; entry.each(&block) + end + end + end + + def each_blob(&block) + @data.sort.each do |name, entry| + case entry + when Blob; yield entry + when Tree; entry.each_blob(&block) + end + end + end + + def to_hash + @data.inject({}) do |hash, (name, entry)| + hash[name] = entry.is_a?(Tree) ? entry.to_hash : entry.serialize + hash + end + end + + end + +end diff --git a/test/benchmark.rb b/test/benchmark.rb new file mode 100644 index 0000000..7dcf74a --- /dev/null +++ b/test/benchmark.rb @@ -0,0 +1,24 @@ +require 'git_store' +require 'benchmark' +require 'fileutils' + +FileUtils.rm_rf 'repo' +FileUtils.mkpath 'repo' +Dir.chdir 'repo' + +`git init` + +store = GitStore.new + +'a'.upto('z') do |tree| + 'aa'.upto('zz') do |key| + store[tree, key] = (1..10).map { rand.to_s } + end +end + +store.commit + +Benchmark.bm do |x| + x.report { store['a', 'bb'] = "x" * 100; store.commit } +end + diff --git a/spec/git_store_spec.rb b/test/git_store_spec.rb similarity index 62% rename from spec/git_store_spec.rb rename to test/git_store_spec.rb index f229dc1..c9b5edc 100644 --- a/spec/git_store_spec.rb +++ b/test/git_store_spec.rb @@ -1,40 +1,50 @@ -$:.unshift(File.expand_path(File.dirname(__FILE__) + '/../lib')) - require 'git_store' require 'yaml' describe GitStore do - REPO = File.expand_path(File.dirname(__FILE__) + '/test_repo') + REPO = File.expand_path(File.dirname(__FILE__) + '/repo') before do FileUtils.rm_rf REPO Dir.mkdir REPO Dir.chdir REPO - `git init` end def store - @store or - begin - @store = GitStore.new(REPO) - @store.load - @store - end + @store end - + def file(file, data) FileUtils.mkpath(File.dirname(file)) open(file, 'w') { |io| io << data } - `git add #{file}` - `git commit -m 'added #{file}'` - File.unlink(file) + if @use_git + `git add #{file}` + `git commit -m 'added #{file}'` + File.unlink(file) + end + end + + def self.it(text, &block) + super "#{text} with git" do + `git init` + @use_git = true + @store = GitStore.new + instance_eval(&block) + end + + super "#{text} without git" do + @use_git = false + @store = GitStore::FileStore.new + instance_eval(&block) + end end it 'should load a repo' do file 'a', 'Hello' file 'b', 'World' - + + store.load store['a'].should == 'Hello' store['b'].should == 'World' end @@ -43,6 +53,7 @@ def file(file, data) file 'x/a', 'Hello' file 'y/b', 'World' + store.load store['x'].should be_kind_of(GitStore::Tree) store['y'].should be_kind_of(GitStore::Tree) @@ -51,29 +62,35 @@ def file(file, data) end it 'should commit added files' do - store['c'] = 'Hello' - store['d'] = 'World' - store.commit + if @use_git + store.load + store['c'] = 'Hello' + store['d'] = 'World' + store.commit - `git checkout` + `git checkout` - File.should be_exist('c') - File.should be_exist('d') + File.should be_exist('c') + File.should be_exist('d') - File.read('c').should == 'Hello' - File.read('d').should == 'World' + File.read('c').should == 'Hello' + File.read('d').should == 'World' + end end it 'should load yaml' do file 'x/a.yml', '[1, 2, 3, 4]' - store['x']['a.yml'].should == [1,2,3,4] + store.load + store['x']['a.yml'].should == [1,2,3,4] store['x']['a.yml'] = [1,2,3,4,5] + store.root.to_hash.should == { "x" => { "a.yml" => "--- \n- 1\n- 2\n- 3\n- 4\n- 5\n"} } + store.commit store.load - + store['x']['a.yml'].should == [1,2,3,4,5] end @@ -81,6 +98,7 @@ def file(file, data) file 'x/a', 'Hello' file 'y/b', 'World' + store.load store['x/a'].should == 'Hello' store['y/b'].should == 'World' @@ -94,6 +112,7 @@ def file(file, data) end it 'should create new trees' do + store.load store['new/tree'] = 'This tree' store['this', 'tree'] = 'Another' store.commit @@ -104,6 +123,7 @@ def file(file, data) end it 'should preserve loaded trees' do + store.load tree = store['tree'] = GitStore::Tree.new store['tree']['example'] = 'Example' store.commit @@ -113,5 +133,3 @@ def file(file, data) end end - -