Skip to content

Commit

Permalink
Use latest wikidata-fetcher
Browse files Browse the repository at this point in the history
  • Loading branch information
tmtmtmtm committed Dec 31, 2015
1 parent b7567c3 commit 6f404fc
Showing 1 changed file with 1 addition and 48 deletions.
49 changes: 1 addition & 48 deletions scraper.rb
@@ -1,55 +1,8 @@
#!/bin/env ruby
# encoding: utf-8

module EveryPolitician

module Wikidata
require 'wikidata/fetcher'

require 'json'
require 'rest-client'

def self.morph_wikinames(h)
morph_api_url = 'https://api.morph.io/%s/data.json' % h[:source]
morph_api_key = ENV["MORPH_API_KEY"]
result = RestClient.get morph_api_url, params: {
key: morph_api_key,
query: "SELECT DISTINCT(#{h[:column]}) AS wikiname FROM data"
}
JSON.parse(result, symbolize_names: true).map { |h| h[:wikiname] }.compact
end

#-------------------------------------------------------------------

require 'wikidata/fetcher'
require 'scraperwiki'

def self.scrape_wikidata(h)
langs = ((h[:lang] || h[:names].keys) + [:en]).flatten.uniq
langpairs = h[:names].map { |lang, names| WikiData.ids_from_pages(lang.to_s, names) }
combined = langpairs.reduce({}) { |h, people| h.merge(people.invert) }

combined.each do |id, name|
data = WikiData::Fetcher.new(id: id).data(langs) rescue nil
unless data
warn "No data for #{id}"
next
end
data[:original_wikiname] = name
ScraperWiki.save_sqlite([:id], data)
end
end

#-------------------------------------------------------------------

require 'rest-client'

def self.notify_rebuilder
RestClient.post ENV['MORPH_REBUILDER_URL'], {} if ENV['MORPH_REBUILDER_URL']
end
end
end

# require 'pry'
names = EveryPolitician::Wikidata.morph_wikinames(source: 'tmtmtmtm/malaysian_parliament-wp', column: 'wikipedia__en')
EveryPolitician::Wikidata.scrape_wikidata(names: { en: names })
warn EveryPolitician::Wikidata.notify_rebuilder
Expand Down

0 comments on commit 6f404fc

Please sign in to comment.