-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.rb
63 lines (49 loc) · 1.2 KB
/
scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true
require 'pry'
require 'scraped'
require 'scraperwiki'
require 'open-uri/cached'
OpenURI::Cache.cache_path = '.cache'
# require 'scraped_page_archive/open-uri'
class MembersPage < Scraped::HTML
field :members do
noko.css('.MsoTableGrid tr').drop(1).map do |row|
fragment(row => MemberRow).to_h
end
end
end
class MemberRow < Scraped::HTML
field :id do
tds[0].text.tidy
end
field :name do
tds[1].text.tidy
end
field :area do
tds[2].text.tidy
end
field :photo do
tds[3].css('img[src*="assembleenationale"]/@src').text
end
field :party do
'unknown'
end
field :term do
12
end
private
def tds
noko.css('td')
end
end
def scraper(h)
url, klass = h.to_a.first
klass.new(response: Scraped::Request.new(url: url).response)
end
url = 'http://www.assembleenationale.mr/index.php?option=com_content&view=article&id=352&Itemid=164&lang=en'
data = scraper(url => MembersPage).members
data.each { |r| puts r.reject { |_, v| v.to_s.empty? }.sort_by { |k, _| k }.to_h } if ENV['MORPH_DEBUG']
ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
ScraperWiki.save_sqlite(%i[name term], data)