Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Modernise scraper
  • Loading branch information
tmtmtmtm committed Feb 19, 2016
1 parent 565af96 commit b4c012a
Showing 1 changed file with 3 additions and 27 deletions.
30 changes: 3 additions & 27 deletions scraper.rb
@@ -1,32 +1,8 @@
#!/bin/env ruby
# encoding: utf-8

require 'json'
require 'pry'
require 'rest-client'
require 'scraperwiki'
require 'wikidata/fetcher'
require 'mediawiki_api'

def members
morph_api_url = 'https://api.morph.io/tmtmtmtm/jordan-house-of-representatives-wikipedia/data.json'
morph_api_key = ENV["MORPH_API_KEY"]
result = RestClient.get morph_api_url, params: {
key: morph_api_key,
query: "select DISTINCT(wikiname) AS wikiname from data"
}
JSON.parse(result, symbolize_names: true)
end

WikiData.ids_from_pages('en', members.map { |c| c[:wikiname] }).each_with_index do |p, i|
data = WikiData::Fetcher.new(id: p.last).data('ar') rescue nil
unless data
warn "No data for #{p}"
next
end
puts data
ScraperWiki.save_sqlite([:id], data)
end

warn RestClient.post ENV['MORPH_REBUILDER_URL'], {} if ENV['MORPH_REBUILDER_URL']

names = EveryPolitician::Wikidata.morph_wikinames(source: 'tmtmtmtm/jordan-house-of-representatives-wikipedia', column: 'wikiname')
EveryPolitician::Wikidata.scrape_wikidata(names: { en: names }, output: false)
warn EveryPolitician::Wikidata.notify_rebuilder

0 comments on commit b4c012a

Please sign in to comment.