Skip to content

Commit

Permalink
Rewrite in modern style
Browse files Browse the repository at this point in the history
  • Loading branch information
tmtmtmtm committed Apr 2, 2019
1 parent 24abcac commit 0bc5ee2
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 96 deletions.
20 changes: 14 additions & 6 deletions Gemfile
@@ -1,15 +1,23 @@
# frozen_string_literal: true

# It's easy to add more libraries or choose different versions. Any libraries
# specified here will be installed and made available to your morph.io scraper.
# Find out more: https://morph.io/documentation/ruby

ruby '2.4.4'

source 'https://rubygems.org'
git_source(:github) { |repo_name| "https://github.com/#{repo_name}.git" }

ruby '2.3.3'

gem 'everypolitician', github: 'everypolitician/everypolitician-ruby'
gem 'pry'
gem 'rubocop'
gem 'rest-client'
gem 'scraped', github: 'everypolitician/scraped', branch: 'scraper-class'
gem 'scraperwiki', github: 'openaustralia/scraperwiki-ruby', branch: 'morph_defaults'
gem 'wikisnakker', github: 'everypolitician/wikisnakker'
gem 'sqlite_magic', github: 'openc/sqlite_magic'

group :quality do
gem 'rubocop'
end

group :development do
gem 'pry'
end
102 changes: 61 additions & 41 deletions Gemfile.lock
@@ -1,19 +1,13 @@
GIT
remote: https://github.com/everypolitician/everypolitician-ruby.git
revision: 20582b1512358a0e8789ea289201e37ab5f085c2
remote: https://github.com/everypolitician/scraped.git
revision: ecb23adeca95fba5356509d6445d528e212b3905
branch: scraper-class
specs:
everypolitician (0.20.0)
everypolitician-popolo (>= 0.8.0)
scraped (0.6.2)
field_serializer (>= 0.3.0)
nokogiri
require_all

GIT
remote: https://github.com/everypolitician/wikisnakker.git
revision: 4e091cdc9619b6c12db8903075effef361071132
specs:
wikisnakker (0.9.1)
require_all
yajl-ruby

GIT
remote: https://github.com/openaustralia/scraperwiki-ruby.git
revision: fc50176812505e463077d5c673d504a6a234aa78
Expand All @@ -23,50 +17,76 @@ GIT
httpclient
sqlite_magic

GIT
remote: https://github.com/openc/sqlite_magic.git
revision: 4df975eb4e9891de54f870077c83f63762af9bf9
specs:
sqlite_magic (0.0.6)
sqlite3

GEM
remote: https://rubygems.org/
specs:
ast (2.3.0)
coderay (1.1.0)
everypolitician-popolo (0.8.0)
require_all
httpclient (2.6.0.1)
method_source (0.8.2)
parser (2.3.1.2)
ast (~> 2.2)
powerpack (0.1.1)
pry (0.10.1)
ast (2.4.0)
coderay (1.1.2)
domain_name (0.5.20180417)
unf (>= 0.0.5, < 1.0.0)
field_serializer (0.3.0)
http-cookie (1.0.3)
domain_name (~> 0.5)
httpclient (2.8.3)
jaro_winkler (1.5.2)
method_source (0.9.2)
mime-types (3.2.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2018.0812)
mini_portile2 (2.4.0)
netrc (0.11.0)
nokogiri (1.10.1)
mini_portile2 (~> 2.4.0)
parallel (1.14.0)
parser (2.6.0.0)
ast (~> 2.4.0)
powerpack (0.1.2)
pry (0.12.2)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
rainbow (2.1.0)
require_all (1.4.0)
rubocop (0.42.0)
parser (>= 2.3.1.1, < 3.0)
method_source (~> 0.9.0)
psych (3.1.0)
rainbow (3.0.0)
require_all (2.0.0)
rest-client (2.0.2)
http-cookie (>= 1.0.2, < 2.0)
mime-types (>= 1.16, < 4.0)
netrc (~> 0.8)
rubocop (0.65.0)
jaro_winkler (~> 1.5.1)
parallel (~> 1.10)
parser (>= 2.5, != 2.5.1.1)
powerpack (~> 0.1)
rainbow (>= 1.99.1, < 3.0)
psych (>= 3.1.0)
rainbow (>= 2.2.2, < 4.0)
ruby-progressbar (~> 1.7)
unicode-display_width (~> 1.0, >= 1.0.1)
ruby-progressbar (1.8.1)
slop (3.6.0)
sqlite3 (1.3.10)
sqlite_magic (0.0.3)
sqlite3
unicode-display_width (1.1.1)
yajl-ruby (1.3.0)
unicode-display_width (~> 1.4.0)
ruby-progressbar (1.10.0)
sqlite3 (1.4.0)
unf (0.1.4)
unf_ext
unf_ext (0.0.7.5)
unicode-display_width (1.4.1)

PLATFORMS
ruby

DEPENDENCIES
everypolitician!
pry
rest-client
rubocop
scraped!
scraperwiki!
wikisnakker!
sqlite_magic!

RUBY VERSION
ruby 2.3.3p222
ruby 2.4.4p296

BUNDLED WITH
1.13.6
1.16.5
109 changes: 109 additions & 0 deletions lib/cabinet.rb
@@ -0,0 +1,109 @@
# frozen_string_literal: true

# TODO: extend Scraped::Scraper with ability to add Strategies
class Scraped::Request::Strategy::LiveRequest
require 'rest-client'

def url
SPARQL_URL % CGI.escape(raw_query)
end

private

SPARQL_URL = 'https://query.wikidata.org/sparql?format=json&query=%s'

QUERY = <<~SPARQL
SELECT DISTINCT ?ps ?item ?itemLabel ?minister ?ministerLabel ?ordinal ?start ?startprecision ?end ?endprecision ?cabinet ?cabinetLabel {
{
SELECT DISTINCT ?ps ?item ?minister ?ordinal ?start ?startprecision ?end ?endprecision ?cabinet {
?item p:P39/ps:P39/wdt:P279* wd:%s .
?item p:P39 ?ps .
?ps ps:P39 ?minister .
?minister wdt:P279* wd:Q83307 .
OPTIONAL { ?ps pq:P1545 ?ordinal }
OPTIONAL { ?ps pqv:P580 [wikibase:timeValue ?start ; wikibase:timePrecision ?startprecision ] }
OPTIONAL { ?ps pqv:P582 [wikibase:timeValue ?end ; wikibase:timePrecision ?endprecision ] }
# Ignore anything with a different jurisdiction
OPTIONAL { wd:%s wdt:P1001 ?legislative_jurisdiction }
OPTIONAL { ?minister wdt:P1001 ?executive_jurisdiction }
FILTER (!BOUND(?legislative_jurisdiction) || !BOUND(?executive_jurisdiction) || (?legislative_jurisdiction = ?executive_jurisdiction))
}
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
SPARQL

def raw_query
QUERY % [@url, @url]
end
end

class CabinetScraper < Scraped::JSON
field :memberships do
json[:results][:bindings].map { |result| fragment(result => Membership).to_h }
end

class Wikidate
def initialize(date, precision)
@date = date
@precision = precision
end

# not to_s, as this can return 'nil'
def as_string
return unless date && precision
return unless slice_point

date.slice(0, slice_point)
end

private

attr_reader :date, :precision

PRECISION_LENGTH = {
'9' => 4, # year
'10' => 7, # month
'11' => 10, # day
}.freeze

def slice_point
PRECISION_LENGTH[precision]
end
end

class Membership < Scraped::JSON
field :id do
json.dig(:item, :value).to_s.split('/').last
end

field :name do
json.dig(:itemLabel, :value)
end

field :position_id do
json.dig(:ps, :value).to_s.split('/').last
end

field :position do
json.dig(:minister, :value).to_s.split('/').last
end

field :label do
json.dig(:ministerLabel, :value)
end

field :start_date do
Wikidate.new(json.dig(:start, :value), json.dig(:startprecision, :value)).as_string
end

field :end_date do
Wikidate.new(json.dig(:end, :value), json.dig(:endprecision, :value)).as_string
end

field :ordinal do
json.dig(:ordinal, :value).to_i
end
end
end
38 changes: 0 additions & 38 deletions lib/politician.rb

This file was deleted.

14 changes: 3 additions & 11 deletions scraper.rb
@@ -1,16 +1,8 @@
#!/bin/env ruby
# encoding: utf-8
# frozen_string_literal: true

require 'everypolitician'
require 'pry'
require 'scraped'
require 'scraperwiki'
require_relative 'lib/cabinet'

require_relative 'lib/politician'

ScraperWiki.sqliteexecute('DROP TABLE data') rescue nil
house = EveryPolitician::Index.new.country('United-States-of-America').upper_house
house.popolo.persons.map(&:wikidata).compact.each_slice(100) do |wanted|
data = Wikisnakker::Politician.find(wanted).flat_map(&:positions).compact
ScraperWiki.save_sqlite(%i(id position start_date), data)
end
Scraped::Scraper.new('Q13217683' => CabinetScraper).store(:memberships, index: %i[position_id])

0 comments on commit 0bc5ee2

Please sign in to comment.