Skip to content

Commit

Permalink
Unify all gems into one, generalize resources.
Browse files Browse the repository at this point in the history
Separate scraper into two distinct phases: retrieval and scraping.
Retrieval fetches the files to local disk while scraping analyzes
the repository content. It's now possible to have scrapers that
scrape different kind of resources (cookbooks and workflows so far).
  • Loading branch information
Raphael Simon committed Sep 9, 2011
1 parent c17a406 commit 3a12a6b
Show file tree
Hide file tree
Showing 128 changed files with 2,681 additions and 4,978 deletions.
5 changes: 2 additions & 3 deletions Gemfile
Expand Up @@ -2,7 +2,7 @@ source "http://rubygems.org"

gem "json", "~> 1.4.5"
gem "blackwinter-git", "~> 1.2.7"
gem "libarchive", "~> 0.1.1"
gem "libarchive", "~> 0.1.2"
gem "curb", "~> 0.7.7.1"
gem "right_aws", "~> 2.0"
gem "process_watcher", "~> 0.3"
Expand All @@ -11,6 +11,5 @@ group :development do
gem "rspec", "~> 2.3"
gem "rake"
gem "flexmock"
gem "rtags"
gem "ruby-debug"
gem "ruby-debug19"
end
55 changes: 30 additions & 25 deletions Gemfile.lock
@@ -1,33 +1,39 @@
GEM
remote: http://rubygems.org/
specs:
archive-tar-minitar (0.5.2)
blackwinter-git (1.2.7)
columnize (0.3.2)
columnize (0.3.4)
curb (0.7.7.1)
diff-lcs (1.1.2)
flexmock (0.8.8)
flexmock (0.9.0)
json (1.4.6)
libarchive (0.1.1)
linecache (0.43)
process_watcher (0.3)
rake (0.8.7)
right_aws (2.0.0)
right_http_connection (>= 1.2.1)
right_http_connection (1.2.4)
rspec (2.3.0)
rspec-core (~> 2.3.0)
rspec-expectations (~> 2.3.0)
rspec-mocks (~> 2.3.0)
rspec-core (2.3.1)
rspec-expectations (2.3.0)
libarchive (0.1.2)
linecache19 (0.5.12)
ruby_core_source (>= 0.1.4)
process_watcher (0.4)
rake (0.9.2)
right_aws (2.1.0)
right_http_connection (>= 1.2.5)
right_http_connection (1.3.0)
rspec (2.6.0)
rspec-core (~> 2.6.0)
rspec-expectations (~> 2.6.0)
rspec-mocks (~> 2.6.0)
rspec-core (2.6.4)
rspec-expectations (2.6.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.3.0)
rtags (0.97)
ruby-debug (0.10.4)
columnize (>= 0.1)
ruby-debug-base (~> 0.10.4.0)
ruby-debug-base (0.10.4)
linecache (>= 0.3)
rspec-mocks (2.6.0)
ruby-debug-base19 (0.11.25)
columnize (>= 0.3.1)
linecache19 (>= 0.5.11)
ruby_core_source (>= 0.1.4)
ruby-debug19 (0.11.6)
columnize (>= 0.3.1)
linecache19 (>= 0.5.11)
ruby-debug-base19 (>= 0.11.19)
ruby_core_source (0.1.5)
archive-tar-minitar (>= 0.5.2)

PLATFORMS
ruby
Expand All @@ -37,10 +43,9 @@ DEPENDENCIES
curb (~> 0.7.7.1)
flexmock
json (~> 1.4.5)
libarchive (~> 0.1.1)
libarchive (~> 0.1.2)
process_watcher (~> 0.3)
rake
right_aws (~> 2.0)
rspec (~> 2.3)
rtags
ruby-debug
ruby-debug19
2 changes: 1 addition & 1 deletion README.rdoc
Expand Up @@ -17,7 +17,7 @@ cost of requiring some systems administration work external to Ruby.
require 'rubygems'
require 'right_scraper'

scraper = RightScale::Scraper.new('/tmp')
scraper = RightScale::Scraper.new(:basedir => '/tmp', :kind => :cookbook)
scraper.scrape(:type => :git, :url => 'git://github.com/rightscale/right_scraper.git')

== INSTALLATION
Expand Down
17 changes: 7 additions & 10 deletions Rakefile
@@ -1,5 +1,5 @@
#-- -*-ruby-*-
# Copyright: Copyright (c) 2010 RightScale, Inc.
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -27,8 +27,8 @@ require 'bundler/setup'
require 'fileutils'
require 'rake'
require 'rspec/core/rake_task'
require 'rake/rdoctask'
require 'rake/gempackagetask'
require 'rdoc/task'
require 'rubygems/package_task'
require 'rake/clean'

task :default => 'spec'
Expand All @@ -44,7 +44,6 @@ task :gem => 'pkg' do
end

CLEAN.include('pkg')
CLEAN.include('right_scraper_all/lib')

# == Unit Tests == #

Expand All @@ -54,7 +53,8 @@ task :specs => :spec

desc 'Run unit tests'
RSpec::Core::RakeTask.new do |t|
t.pattern = '*/spec/**/*_spec.rb'
t.pattern = 'spec/**/*_spec.rb'
t.rspec_opts = ["--color", "--format", "nested"]
end

namespace :spec do
Expand All @@ -75,15 +75,12 @@ end
# == Documentation == #

desc "Generate API documentation to doc/rdocs/index.html"
Rake::RDocTask.new do |rd|
RDoc::Task.new do |rd|
rd.rdoc_dir = 'doc/rdocs'
rd.main = 'README.rdoc'
rd.rdoc_files.include 'README.rdoc', '*/README.rdoc', "*/lib/**/*.rb"
rd.rdoc_files.include 'README.rdoc', 'lib/**/*.rb'

rd.options << '--inline-source'
rd.options << '--line-numbers'
rd.options << '--all'
rd.options << '--fileboxes'
rd.options << '--diagram'
end

Expand Down
@@ -1,5 +1,5 @@
#--
# Copyright: Copyright (c) 2010 RightScale, Inc.
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
Expand All @@ -23,20 +23,34 @@

# Explicitly list required files to make IDEs happy
require 'fileutils'
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'builders', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'builders', 'archive'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'builders', 'filesystem'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'builders', 'union'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'cookbook'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'logger'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'repository'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'repositories', 'download'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scanners', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scanners', 'manifest'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scanners', 'metadata'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scanners', 'union'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scraper'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scrapers', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scrapers', 'checkout'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scrapers', 'filesystem'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper_base', 'scrapers', 'command_line_download'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'builders', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'builders', 'filesystem'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'builders', 'union'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'logger'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'processes', 'ssh'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'repositories', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'repositories', 'download'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'repositories', 'git'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'repositories', 'svn'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'resources', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'resources', 'cookbook'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'resources', 'workflow'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'retrievers', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'retrievers', 'checkout'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'retrievers', 'download'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'retrievers', 'git'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'retrievers', 'svn'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'cookbook_manifest'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'cookbook_metadata'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'cookbook_s3_upload'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'union'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'workflow_manifest'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scanners', 'workflow_metadata'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scraper'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scraper_logger'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scrapers', 'base'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scrapers', 'cookbook'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'scrapers', 'workflow'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'svn_client'))
require File.expand_path(File.join(File.dirname(__FILE__), 'right_scraper', 'version'))
@@ -1,5 +1,5 @@
#--
# Copyright: Copyright (c) 2010 RightScale, Inc.
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -31,7 +31,7 @@ module Builders
#
# The lifecycle for a builder is as follows:
# - builder = Builder.new (once)
# - builder.go(dir, cookbook) (many times)
# - builder.go(dir, resource) (many times)
# - builder.finish (once)
class Builder
# Create a new Builder. Recognizes options as given. Some
Expand All @@ -47,12 +47,12 @@ def initialize(options={})
@logger = options.fetch(:logger, Logger.new)
end

# Run builder for this cookbook.
# Run builder for this resource.
#
# === Parameters
# dir(String):: directory cookbook exists at
# cookbook(RightScraper::Cookbook):: cookbook instance being built
def go(dir, cookbook)
# dir(String):: directory resource exists at
# resource(Object):: resource instance being built
def go(dir, resource)
end

# Notification that all scans for this repository have
Expand Down
@@ -1,5 +1,5 @@
#--
# Copyright: Copyright (c) 2010 RightScale, Inc.
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -27,20 +27,21 @@ module RightScraper
module Builders
# Build metadata by scanning the filesystem.
class Filesystem < Builder

# Create a new filesystem scanner. In addition to the options
# recognized by Builder, this class recognizes <tt>:scraper</tt> and
# recognized by Builder, this class recognizes <tt>:retriever</tt> and
# <tt>:scanner</tt>.
#
# === Options
# <tt>:scraper</tt>:: Required. FilesystemBasedScraper currently being used
# <tt>:scanner</tt>:: Required. Scanner currently being used
# <tt>:ignorable_paths</tt>:: Ignore directories whose name belong to this list
#
# === Parameters
# options(Hash):: scraper options
def initialize(options={})
super
@scraper = options.fetch(:scraper)
@scanner = options.fetch(:scanner)
@ignorable_paths = options[:ignorable_paths]
end

# Tell the scanner we're done.
Expand All @@ -49,16 +50,16 @@ def finish
@scanner.finish
end

# Run builder for this cookbook.
# Run builder for this resource.
#
# === Parameters
# dir(String):: directory cookbook exists at
# cookbook(RightScraper::Cookbook):: cookbook instance being built
def go(dir, cookbook)
# dir(String):: directory resource exists at
# resource(Object):: resource instance being built
def go(dir, resource)
@logger.operation(:scanning_filesystem, "rooted at #{dir}") do
@scanner.begin(cookbook)
@scanner.begin(resource)
maybe_scan(Dir.new(dir), nil)
@scanner.end(cookbook)
@scanner.end(resource)
end
end

Expand All @@ -72,11 +73,11 @@ def maybe_scan(directory, position)
#
# === Parameters
# directory(Dir):: directory to scan
# position(String):: relative pathname for _directory_ from root of cookbook
# position(String):: relative pathname for _directory_ from root of resource
def scan(directory, position)
directory.each do |entry|
next if entry == '.' || entry == '..'
next if @scraper.ignorable?(entry)
next if @ignorable_paths && @ignorable_paths.include?(entry)

fullpath = File.join(directory.path, entry)
relative_position = position ? File.join(position, entry) : entry
Expand Down
@@ -1,5 +1,5 @@
#--
# Copyright: Copyright (c) 2010 RightScale, Inc.
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -38,13 +38,13 @@ def initialize(classes, options={})
@subbuilders = classes.map {|klass| klass.new(options)}
end

# Run each builder for this cookbook.
# Run each builder for this resource.
#
# === Parameters
# dir(String):: directory cookbook exists at
# cookbook(RightScraper::Cookbook):: cookbook instance being built
def go(dir, cookbook)
@subbuilders.each {|builder| builder.go(dir, cookbook)}
# dir(String):: directory resource exists at
# resource(RightScraper::Resources::Base):: resource instance being built
def go(dir, resource)
@subbuilders.each {|builder| builder.go(dir, resource)}
end

# Notify subbuilders that all scans for this repository have
Expand Down
@@ -1,5 +1,5 @@
#--
# Copyright: Copyright (c) 2010 RightScale, Inc.
# Copyright: Copyright (c) 2010-2011 RightScale, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
Expand Down Expand Up @@ -38,7 +38,7 @@ def initialize(*args)
@exceptional = false
end

# (RightScraper::Repository) Repository currently being examined.
# (RightScraper::Repositories::Base) Repository currently being examined.
attr_writer :repository

# Begin an operation that merits logging. Will call #note_error
Expand Down
File renamed without changes.
File renamed without changes.

0 comments on commit 3a12a6b

Please sign in to comment.