Skip to content

Commit

Permalink
Added russian scraping logic
Browse files Browse the repository at this point in the history
  • Loading branch information
k-rudy committed Apr 17, 2014
1 parent 3e64be2 commit 59e7001
Show file tree
Hide file tree
Showing 8 changed files with 132 additions and 48 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@
# Ignore all logfiles and tempfiles.
/log/*.log
/tmp

# OSx
.DS_Store
4 changes: 2 additions & 2 deletions Gemfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
source 'https://rubygems.org'

# Bundle edge Rails instead: gem 'rails', github: 'rails/rails'
gem 'rails', '~> 4.0.3'
gem 'rails', '~> 4.1.0'

# Mongo object mapper
gem "mongoid", github: 'mongoid/mongoid'

# Use SCSS for stylesheets
gem 'sass-rails'
gem 'sass-rails', '~> 4.0.3'

# Use Uglifier as compressor for JavaScript assets
# gem 'uglifier', '>= 1.3.0'
Expand Down
93 changes: 50 additions & 43 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
GIT
remote: git://github.com/mongoid/mongoid.git
revision: 026e32109178eef2a50b31924f45eee2b7e05c82
revision: 229315900cc8bb196a2d5eafac0b6ffd57477172
specs:
mongoid (4.0.0.beta1)
activemodel (>= 4.0.0)
Expand All @@ -11,33 +11,35 @@ GIT
GEM
remote: https://rubygems.org/
specs:
actionmailer (4.0.4)
actionpack (= 4.0.4)
actionmailer (4.1.0)
actionpack (= 4.1.0)
actionview (= 4.1.0)
mail (~> 2.5.4)
actionpack (4.0.4)
activesupport (= 4.0.4)
builder (~> 3.1.0)
erubis (~> 2.7.0)
actionpack (4.1.0)
actionview (= 4.1.0)
activesupport (= 4.1.0)
rack (~> 1.5.2)
rack-test (~> 0.6.2)
activemodel (4.0.4)
activesupport (= 4.0.4)
builder (~> 3.1.0)
activerecord (4.0.4)
activemodel (= 4.0.4)
activerecord-deprecated_finders (~> 1.0.2)
activesupport (= 4.0.4)
arel (~> 4.0.0)
activerecord-deprecated_finders (1.0.3)
activesupport (4.0.4)
actionview (4.1.0)
activesupport (= 4.1.0)
builder (~> 3.1)
erubis (~> 2.7.0)
activemodel (4.1.0)
activesupport (= 4.1.0)
builder (~> 3.1)
activerecord (4.1.0)
activemodel (= 4.1.0)
activesupport (= 4.1.0)
arel (~> 5.0.0)
activesupport (4.1.0)
i18n (~> 0.6, >= 0.6.9)
minitest (~> 4.2)
multi_json (~> 1.3)
json (~> 1.7, >= 1.7.7)
minitest (~> 5.1)
thread_safe (~> 0.1)
tzinfo (~> 0.3.37)
arel (4.0.2)
tzinfo (~> 1.1)
arel (5.0.1.20140414130214)
bson (2.2.2)
builder (3.1.4)
builder (3.2.2)
coderay (1.1.0)
coffee-rails (4.0.1)
coffee-script (>= 2.2.0)
Expand Down Expand Up @@ -67,13 +69,14 @@ GEM
jquery-rails (3.1.0)
railties (>= 3.0, < 5.0)
thor (>= 0.14, < 2.0)
json (1.8.1)
mail (2.5.4)
mime-types (~> 1.16)
treetop (~> 1.4.8)
method_source (0.8.2)
mime-types (1.25.1)
mini_portile (0.5.3)
minitest (4.7.5)
minitest (5.3.3)
mongoid-rspec (1.10.0)
mongoid (>= 3.0.1)
rake
Expand All @@ -95,20 +98,22 @@ GEM
rack (1.5.2)
rack-test (0.6.2)
rack (>= 1.0)
rails (4.0.4)
actionmailer (= 4.0.4)
actionpack (= 4.0.4)
activerecord (= 4.0.4)
activesupport (= 4.0.4)
rails (4.1.0)
actionmailer (= 4.1.0)
actionpack (= 4.1.0)
actionview (= 4.1.0)
activemodel (= 4.1.0)
activerecord (= 4.1.0)
activesupport (= 4.1.0)
bundler (>= 1.3.0, < 2.0)
railties (= 4.0.4)
sprockets-rails (~> 2.0.0)
railties (4.0.4)
actionpack (= 4.0.4)
activesupport (= 4.0.4)
railties (= 4.1.0)
sprockets-rails (~> 2.0)
railties (4.1.0)
actionpack (= 4.1.0)
activesupport (= 4.1.0)
rake (>= 0.8.7)
thor (>= 0.18.1, < 2.0)
rake (10.2.2)
rake (10.3.0)
rest-client (1.6.7)
mime-types (>= 1.16)
rspec (2.14.1)
Expand All @@ -127,23 +132,24 @@ GEM
rspec-core (~> 2.14.0)
rspec-expectations (~> 2.14.0)
rspec-mocks (~> 2.14.0)
sass (3.3.4)
sass-rails (4.0.1)
sass (3.2.19)
sass-rails (4.0.3)
railties (>= 4.0.0, < 5.0)
sass (>= 3.1.10)
sprockets-rails (~> 2.0.0)
sass (~> 3.2.0)
sprockets (~> 2.8, <= 2.11.0)
sprockets-rails (~> 2.0)
simplecov (0.8.2)
docile (~> 1.1.0)
multi_json
simplecov-html (~> 0.8.0)
simplecov-html (0.8.0)
slop (3.5.0)
sprockets (2.12.0)
sprockets (2.11.0)
hike (~> 1.2)
multi_json (~> 1.0)
rack (~> 1.0)
tilt (~> 1.1, != 1.3.0)
sprockets-rails (2.0.1)
sprockets-rails (2.1.3)
actionpack (>= 3.0)
activesupport (>= 3.0)
sprockets (~> 2.8)
Expand All @@ -158,7 +164,8 @@ GEM
polyglot (>= 0.3.1)
turbolinks (2.2.2)
coffee-rails
tzinfo (0.3.39)
tzinfo (1.1.0)
thread_safe (~> 0.1)

PLATFORMS
ruby
Expand All @@ -171,8 +178,8 @@ DEPENDENCIES
mongoid-rspec
nokogiri
pry
rails (~> 4.0.3)
rails (~> 4.1.0)
rspec-rails
sass-rails
sass-rails (~> 4.0.3)
simplecov
turbolinks
4 changes: 3 additions & 1 deletion config/application.yml.example
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
sources:
ru:
url: http://google.com
url: http://google.com
mappings:
gn: ge
15 changes: 13 additions & 2 deletions lib/bible/scrapers/base.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
require 'open-uri'

module Bible
module Scrapers
# Contains base scraping logic that is common for all translations
Expand Down Expand Up @@ -43,14 +45,23 @@ def scrape_verses(book, chapter, verse_number = 1)
#
# @return [ Verse, nil ] verse if it was creates, otherwise - nil
def process_verse(book, chapter, verse_number)
verse_text = scrape_verse(book, chapter, verse_number)
mapping = book_mapping(book)
verse_text = scrape_verse(mapping, chapter, verse_number)
create_verse(book, chapter, verse_number, verse_text) if verse_text
end

# Gets the book mapping name for the scraping puposes
#
# @return [ String ] mapping value
def book_mapping(book)
title = book.title.downcase
CONFIG[:sources][translation][:mappings][title] || title
end

# This method should be implemented in particular Scraper class
#
# @raise [ NotImplementedError ] on attempt to call the mathos on Base class
def scrape_verse(book, chapter, verse_number)
def scrape_verse(book_mapping, chapter, verse_number)
raise NotImplementedError.new('Scraper class must implement #scrape_verse method')
end

Expand Down
13 changes: 13 additions & 0 deletions lib/bible/scrapers/ru.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,19 @@ module Scrapers
#
class Ru < Base

class << self
# Scrapes verse text.
#
# @param [ String ] book_mapping
# @param [ Integer ] chapter number being imported
# @param [ Integer ] verse number being imported
#
# @return [ String, nil ] string when the verse with the numer exists, nil - otherwise
def scrape_verse(book_mapping, chapter, verse_number)
doc = Nokogiri::HTML(open("#{url}/#{book_mapping}/#{chapter}:#{verse_number}"), nil, 'UTF-8')
doc.css('#editionDependentData .bibletextblock table .bibletext .bibletext').children.last.try(:text).try(:strip)
end
end
end
end
end
25 changes: 25 additions & 0 deletions spec/libs/bible/scrapers/base_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@

describe '#process_verse' do

before { subject.stub(:book_mapping) }

it 'scrapes verse text' do
expect(subject).to receive(:scrape_verse)
subject.send(:process_verse, book, 1, 1)
Expand Down Expand Up @@ -121,4 +123,27 @@
expect(verse.text_translations[:base]).to eq('text')
end
end

describe '#book_mapping' do

before { subject.stub(translation: 'ru') }

context 'when there is a mapping for the book' do

before { book.stub(title: 'Gn') }

it 'returns mapping value' do
expect(subject.send(:book_mapping, book)).to eq('ge')
end
end

context 'when there is no mapping defined' do

before { book.stub(title: 'Le') }

it 'returns downcased book title' do
expect(subject.send(:book_mapping, book)).to eq('le')
end
end
end
end
23 changes: 23 additions & 0 deletions spec/libs/bible/scrapers/ru_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
require 'spec_helper'

describe Bible::Scrapers::Ru do

subject { Bible::Scrapers::Ru }

describe '#scrape_verse' do

context 'when the verse number exists' do

it 'returns verse text' do
expect(subject.send(:scrape_verse, 'ge', 1, 1)).to eq("В начале сотворил Бог небо и землю.")
end
end

context 'when the verse number doesnt exist' do

it 'returns nil' do
expect(subject.send(:scrape_verse, 'ge', 1, 100)).to be_nil
end
end
end
end

0 comments on commit 59e7001

Please sign in to comment.