Skip to content

Commit

Permalink
Source now contains Wikidata IDs
Browse files Browse the repository at this point in the history
  • Loading branch information
tmtmtmtm committed Mar 23, 2016
1 parent 7eb676c commit a83480d
Showing 1 changed file with 2 additions and 15 deletions.
17 changes: 2 additions & 15 deletions scraper.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,7 @@
#!/bin/env ruby
# encoding: utf-8

require 'pry'
require 'wikidata/fetcher'

links = EveryPolitician::Wikidata.morph_wikinames(source: 'tmtmtmtm/cyprus-openpatata', column: 'wikipedia')

def urls_to_names(a)
a.map { |u| URI.decode File.basename u }
end

by_site = links.group_by { |p| p[/\/\/(\w+).wikipedia.org/, 1] }
EveryPolitician::Wikidata.scrape_wikidata(names: {
en: urls_to_names(by_site['en']),
el: urls_to_names(by_site['el']),
})

warn EveryPolitician::Wikidata.notify_rebuilder

ids = EveryPolitician::Wikidata.morph_wikinames(source: 'tmtmtmtm/cyprus-openpatata', column: 'identifier__wikidata')
EveryPolitician::Wikidata.scrape_wikidata(ids: ids, output: true)

0 comments on commit a83480d

Please sign in to comment.