Skip to content

Commit

Permalink
Merge pull request #2 from everypolitician-scrapers/language-code
Browse files Browse the repository at this point in the history
Language code
  • Loading branch information
tmtmtmtm committed Nov 28, 2016
2 parents fb8679b + 75d9735 commit 0854382
Showing 1 changed file with 14 additions and 15 deletions.
29 changes: 14 additions & 15 deletions scraper.rb
Expand Up @@ -32,7 +32,7 @@ def by_area
tds = tr.css('td')
data = {
name: tds[0].css('a').first.text.tidy,
wikipedia__tk: tds[0].xpath('a[not(@class="new")]/@title').text.strip,
wikipedia__tr: tds[0].xpath('a[not(@class="new")]/@title').text.strip,
area: tr.xpath('preceding::h2/span[@class="mw-headline"]').last.text,
party: tds[2].xpath('.//text()').first.text.tidy,
}
Expand All @@ -45,7 +45,7 @@ def by_area_twocol
party = tds[1].css('a').first.text.tidy rescue 'Bağımsız'
data = {
name: tds[0].css('a').first.text.tidy,
wikipedia__tk: tds[0].xpath('a[not(@class="new")]/@title').text.strip,
wikipedia__tr: tds[0].xpath('a[not(@class="new")]/@title').text.strip,
area: tr.xpath('preceding::h2/span[@class="mw-headline"]').last.text,
party: party,
}
Expand All @@ -62,9 +62,9 @@ def four_column
next if tr.text.to_s.empty?
tds = tr.css('td')
if tds.count == 4
area = tds[0].text
area = tds[0].text
namecol = 1
party = tds[3].text
party = tds[3].text
elsif tds.count == 3
namecol = 0
party = tds[2].text
Expand All @@ -77,7 +77,7 @@ def four_column
title = ->(col) { tds[col].xpath('a[not(@class="new")]/@title').text.strip }
{
name: name.(namecol),
wikipedia__tk: title.(namecol),
wikipedia__tr: title.(namecol),
area: area,
party: party,
}
Expand All @@ -90,7 +90,7 @@ def three_column
noko.xpath(".//table[.//th[3][contains(.,'Siyasi parti')]][1]/tr[td]").map do |tr|
tds = tr.css('td')
if tds.count == 3
area = tds[0].text
area = tds[0].text
namecol = 1
party = tds[2].xpath('.//text()').first.text.tidy
elsif tds.count == 2
Expand All @@ -101,7 +101,7 @@ def three_column
title = ->(col) { tds[col].xpath('a[not(@class="new")]/@title').text.strip }
{
name: name.(namecol),
wikipedia__tk: title.(namecol),
wikipedia__tr: title.(namecol),
area: area,
party: party,
}
Expand All @@ -114,7 +114,7 @@ def single_party
noko.xpath(".//table[.//th[1][contains(.,'Seçim Bölgesi')]][1]/tr[td]").map do |tr|
tds = tr.css('td')
if tds.count == 2
area = tds[0].text
area = tds[0].text
namecol = 1
else
namecol = 0
Expand All @@ -123,17 +123,16 @@ def single_party
title = ->(col) { tds[col].xpath('a[not(@class="new")]/@title').text.strip }
{
name: name.(namecol),
wikipedia__tk: title.(namecol),
wikipedia__tr: title.(namecol),
area: area,
party: party,
}
end
end

end

def id_for(m)
[m[:wikipedia__tk], m[:name]].find { |n| !n.to_s.empty? }.downcase.gsub(/[[:space:]]/,'_')
[m[:wikipedia__tr], m[:name]].find { |n| !n.to_s.empty? }.downcase.gsub(/[[:space:]]/,'_')
end

terms = {
Expand All @@ -144,7 +143,7 @@ def id_for(m)
single_party: [ 7, 6, 5, 4, 3, 2, 1 ],
}

PARTY = {
PARTY = {
AKP: ['Adalet ve Kalkınma Partisi'],
ANAP: ['Anavatan Partisi'],
AP: ['Adalet Partisi'],
Expand All @@ -158,7 +157,7 @@ def id_for(m)
DP46: ['Demokrat Parti'],
DP70: ['Demokratik Parti'],
DSP: ['Demokratik Sol Parti'],
DTP: ['Demokratik Türkiye Partisi'],
DTP: ['Demokratik Türkiye Partisi'],
DYP: ['Doğru Yol Partisi'],
FP: ['Fazilet Partisi'],
HDP: ['Halkların Demokratik Partisi'],
Expand All @@ -182,7 +181,7 @@ def id_for(m)
def party_from(party)
party = party.split(/\s*→\s*/).first if party.include? '→'
found = PARTY.find { |id, ns| ns.include? party } or binding.pry
{
{
party_id: found.first.to_s,
party: found.last.first,
}
Expand All @@ -194,7 +193,7 @@ def party_from(party)
url = "https://tr.wikipedia.org/wiki/TBMM_#{t}._d%C3%B6nem_milletvekilleri_listesi"
data = Parser.new(url: url).send(meth).map { |m|
binding.pry if m[:party].to_s.empty?
m.merge(party_from(m[:party])).merge(term: t, source: url, id: id_for(m))
m.merge(party_from(m[:party])).merge(term: t, source: url, id: id_for(m))
}
warn "#{t}: #{data.count}"
data.find_all { |m| m[:party][/[0-9]/] }.each { |m| puts m.to_s.magenta }
Expand Down

0 comments on commit 0854382

Please sign in to comment.