Skip to content

Commit

Permalink
Added base scraper logic
Browse files Browse the repository at this point in the history
  • Loading branch information
k-rudy committed Apr 7, 2014
1 parent 428b967 commit d7d3e3f
Show file tree
Hide file tree
Showing 15 changed files with 268 additions and 62 deletions.
1 change: 1 addition & 0 deletions .ruby-gemset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bible
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.1.1
1 change: 0 additions & 1 deletion .rvmrc

This file was deleted.

3 changes: 3 additions & 0 deletions .simplecov
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SimpleCov.start 'rails' do
# any custom configs like groups and filters can be here at a central place
end
6 changes: 4 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
source 'https://rubygems.org'

# Bundle edge Rails instead: gem 'rails', github: 'rails/rails'
gem 'rails', '~> 4.0.2'
gem 'rails', '~> 4.0.3'

# Mongo object mapper
gem "mongoid", github: 'mongoid/mongoid'

# Use SCSS for stylesheets
gem 'sass-rails', '~> 4.0.0'
gem 'sass-rails'

# Use Uglifier as compressor for JavaScript assets
# gem 'uglifier', '>= 1.3.0'
Expand Down Expand Up @@ -35,4 +35,6 @@ group :test do
gem 'rspec-rails'
gem 'mongoid-rspec'
gem 'coveralls', require: false
gem 'simplecov'
gem 'factory_girl_rails'
end
95 changes: 50 additions & 45 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: git://github.com/mongoid/mongoid.git
revision: 5b0f031992cbec66d68c6cb288a4edb952ed5336
revision: 026e32109178eef2a50b31924f45eee2b7e05c82
specs:
mongoid (4.0.0.beta1)
activemodel (>= 4.0.0)
Expand All @@ -11,33 +11,32 @@ GIT
GEM
remote: https://rubygems.org/
specs:
actionmailer (4.0.2)
actionpack (= 4.0.2)
actionmailer (4.0.4)
actionpack (= 4.0.4)
mail (~> 2.5.4)
actionpack (4.0.2)
activesupport (= 4.0.2)
actionpack (4.0.4)
activesupport (= 4.0.4)
builder (~> 3.1.0)
erubis (~> 2.7.0)
rack (~> 1.5.2)
rack-test (~> 0.6.2)
activemodel (4.0.2)
activesupport (= 4.0.2)
activemodel (4.0.4)
activesupport (= 4.0.4)
builder (~> 3.1.0)
activerecord (4.0.2)
activemodel (= 4.0.2)
activerecord (4.0.4)
activemodel (= 4.0.4)
activerecord-deprecated_finders (~> 1.0.2)
activesupport (= 4.0.2)
activesupport (= 4.0.4)
arel (~> 4.0.0)
activerecord-deprecated_finders (1.0.3)
activesupport (4.0.2)
i18n (~> 0.6, >= 0.6.4)
activesupport (4.0.4)
i18n (~> 0.6, >= 0.6.9)
minitest (~> 4.2)
multi_json (~> 1.3)
thread_safe (~> 0.1)
tzinfo (~> 0.3.37)
arel (4.0.2)
atomic (1.1.14)
bson (2.2.0)
bson (2.2.2)
builder (3.1.4)
coderay (1.1.0)
coffee-rails (4.0.1)
Expand All @@ -47,7 +46,7 @@ GEM
coffee-script-source
execjs
coffee-script-source (1.7.0)
connection_pool (1.2.0)
connection_pool (2.0.0)
coveralls (0.7.0)
multi_json (~> 1.3)
rest-client
Expand All @@ -58,6 +57,11 @@ GEM
docile (1.1.3)
erubis (2.7.0)
execjs (2.0.2)
factory_girl (4.4.0)
activesupport (>= 3.0.0)
factory_girl_rails (4.4.1)
factory_girl (~> 4.4.0)
railties (>= 3.0.0)
hike (1.2.3)
i18n (0.6.9)
jquery-rails (3.1.0)
Expand All @@ -68,62 +72,62 @@ GEM
treetop (~> 1.4.8)
method_source (0.8.2)
mime-types (1.25.1)
mini_portile (0.5.2)
mini_portile (0.5.3)
minitest (4.7.5)
mongoid-rspec (1.10.0)
mongoid (>= 3.0.1)
rake
rspec (>= 2.14)
moped (2.0.0.beta6)
moped (2.0.0.rc1)
bson (~> 2.2)
connection_pool (~> 1.2)
connection_pool (~> 2.0)
optionable (~> 0.2.0)
multi_json (1.8.4)
multi_json (1.9.2)
nokogiri (1.6.1)
mini_portile (~> 0.5.0)
optionable (0.2.0)
origin (2.1.0)
polyglot (0.3.3)
origin (2.1.1)
polyglot (0.3.4)
pry (0.9.12.6)
coderay (~> 1.0)
method_source (~> 0.8)
slop (~> 3.4)
rack (1.5.2)
rack-test (0.6.2)
rack (>= 1.0)
rails (4.0.2)
actionmailer (= 4.0.2)
actionpack (= 4.0.2)
activerecord (= 4.0.2)
activesupport (= 4.0.2)
rails (4.0.4)
actionmailer (= 4.0.4)
actionpack (= 4.0.4)
activerecord (= 4.0.4)
activesupport (= 4.0.4)
bundler (>= 1.3.0, < 2.0)
railties (= 4.0.2)
railties (= 4.0.4)
sprockets-rails (~> 2.0.0)
railties (4.0.2)
actionpack (= 4.0.2)
activesupport (= 4.0.2)
railties (4.0.4)
actionpack (= 4.0.4)
activesupport (= 4.0.4)
rake (>= 0.8.7)
thor (>= 0.18.1, < 2.0)
rake (10.1.1)
rake (10.2.2)
rest-client (1.6.7)
mime-types (>= 1.16)
rspec (2.14.1)
rspec-core (~> 2.14.0)
rspec-expectations (~> 2.14.0)
rspec-mocks (~> 2.14.0)
rspec-core (2.14.7)
rspec-core (2.14.8)
rspec-expectations (2.14.5)
diff-lcs (>= 1.1.3, < 2.0)
rspec-mocks (2.14.5)
rspec-rails (2.14.1)
rspec-mocks (2.14.6)
rspec-rails (2.14.2)
actionpack (>= 3.0)
activemodel (>= 3.0)
activesupport (>= 3.0)
railties (>= 3.0)
rspec-core (~> 2.14.0)
rspec-expectations (~> 2.14.0)
rspec-mocks (~> 2.14.0)
sass (3.2.14)
sass (3.3.4)
sass-rails (4.0.1)
railties (>= 4.0.0, < 5.0)
sass (>= 3.1.10)
Expand All @@ -133,8 +137,8 @@ GEM
multi_json
simplecov-html (~> 0.8.0)
simplecov-html (0.8.0)
slop (3.4.7)
sprockets (2.10.1)
slop (3.5.0)
sprockets (2.12.0)
hike (~> 1.2)
multi_json (~> 1.0)
rack (~> 1.0)
Expand All @@ -145,29 +149,30 @@ GEM
sprockets (~> 2.8)
term-ansicolor (1.3.0)
tins (~> 1.0)
thor (0.18.1)
thread_safe (0.1.3)
atomic
thor (0.19.1)
thread_safe (0.3.3)
tilt (1.4.1)
tins (1.0.0)
tins (1.1.0)
treetop (1.4.15)
polyglot
polyglot (>= 0.3.1)
turbolinks (2.2.1)
turbolinks (2.2.2)
coffee-rails
tzinfo (0.3.38)
tzinfo (0.3.39)

PLATFORMS
ruby

DEPENDENCIES
coveralls
factory_girl_rails
jquery-rails
mongoid!
mongoid-rspec
nokogiri
pry
rails (~> 4.0.2)
rails (~> 4.0.3)
rspec-rails
sass-rails (~> 4.0.0)
sass-rails
simplecov
turbolinks
6 changes: 6 additions & 0 deletions app/models/bible/book.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,10 @@ class Bible::Book

index name: 1
index order: 1

default_scope ->{ asc(:order) }

def scrape(scaper)

end
end
5 changes: 2 additions & 3 deletions app/models/bible/verse.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@ class Bible::Verse

belongs_to :book

field :book_name
field :chapter, type: Integer
field :text, localize: true
field :order, type: Integer

index book_name: 1

index order: 1
end
67 changes: 67 additions & 0 deletions lib/bible/scrapers/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,73 @@ module Scrapers
#
class Base
class <<self
# Scrapes all Bible translation to the db
#
def scrape
Bible::Book.all.map { |book| scrape_book(book) }
end

private

# Srapes single book translation
#
# @param [ Bible::Book ] book
def scrape_book(book)
book.chapters_count.times do |i|
scrape_chapter(book, i + 1)
end
end

# Scrapes chapter
#
# @param [ Bible::Book ] book
# @param [ Integer ] chapter being imported
def scrape_chapter(book, chapter)
scrape_verses(book, chapter)
end

# Scrapes the verse text and creates the Bible::Verse.
# If the verse wasn't blank - recoursively calls the function for scraping next verse
#
# @param [ Bible::Book ] book
# @param [ Integer ] chapter number being imported
# @param [ Integer ] verse_number number being imported
def scrape_verses(book, chapter, verse_number = 1)
process_verse(book, chapter, verse_number) && scrape_verses(book, chapter, verse_number + 1)
end

# Scrapes verse text and if it exists - creates a verse
#
# @return [ Verse, nil ] verse if it was creates, otherwise - nil
def process_verse(book, chapter, verse_number)
verse_text = scrape_verse(book, chapter, verse_number)
create_verse(book, chapter, verse_number, verse_text) if verse_text
end

# This method should be implemented in particular Scraper class
#
# @raise [ NotImplementedError ] on attempt to call the mathos on Base class
def scrape_verse(book, chapter, verse_number)
raise NotImplementedError.new('Scraper class must implement #scrape_verse method')
end

# Creates verse in the book
#
# @param [ Bible::Book ] book
# @param [ Integer ] chapter number being imported
# @param [ Integer ] verse number being imported
# @param [ String ] verse_text
def create_verse(book, chapter, verse_number, verse_text)
Bible::Verse.create!({
book: book,
chapter: chapter,
order: verse_number,
text_translations: {
translation => verse_text
}
})
end

# Gets the bible source url
#
# @return [ String ] source url
Expand Down
9 changes: 9 additions & 0 deletions lib/bible/scrapers/ru.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
module Bible
module Scrapers
# Russian version scraper
#
class Ru < Base

end
end
end
7 changes: 7 additions & 0 deletions spec/factories/bible/books.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
require 'factory_girl'

FactoryGirl.define do
factory :book, class: Bible::Book do
name 'Genesis'
end
end
Loading

0 comments on commit d7d3e3f

Please sign in to comment.