Skip to content

Commit

Permalink
Initial scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
tmtmtmtm committed Oct 5, 2015
1 parent 172cd8f commit 2087070
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions scraper.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/env ruby
# encoding: utf-8

require 'scraperwiki'
require 'wikidata/fetcher'
require 'nokogiri'
require 'open-uri'
require 'pry'

def noko_for(url)
Nokogiri::HTML(open(url).read)
end

def wikinames(url)
noko = noko_for(url)
noko.xpath('//h2[span[contains(.,"Deputātu")]]/following-sibling::table[1]//tr[td]/td[1]//a[not(@class="new")]/@title').map(&:text)
end

names = wikinames('https://lv.wikipedia.org/wiki/12._Saeimas_deput%C4%81ti')

WikiData.ids_from_pages('lv', names).each_with_index do |p, i|
data = WikiData::Fetcher.new(id: p.last).data('lv') rescue nil
unless data
warn "No data for #{p}"
next
end
ScraperWiki.save_sqlite([:id], data)
end

0 comments on commit 2087070

Please sign in to comment.