Skip to content
This repository has been archived by the owner on Jul 29, 2021. It is now read-only.
/ dbpedialite Public archive

Commit

Permalink
Hacked in support for redirecting from Wikidata identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
njh committed Nov 24, 2012
1 parent 4f9813a commit ce1d35a
Show file tree
Hide file tree
Showing 3 changed files with 158 additions and 0 deletions.
19 changes: 19 additions & 0 deletions dbpedialite.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

require 'wikipedia_thing'
require 'wikipedia_category'
require 'wikidata_api'
require 'formats'


Expand Down Expand Up @@ -79,6 +80,18 @@ def redirect_from_title(title)
end
end

def redirect_from_wikidata(id)
begin
sitelink = WikidataApi.get_sitelink(id)
redirect_from_title sitelink['title']
rescue MediaWiki::NotFound => e
not_found e.to_s
rescue MediaWiki::Exception => e
error 500, "Wikidata API excpetion: #{e}"
end
redirect_from_title(title)
end

helpers do
include Sinatra::ContentFor
include Sinatra::UrlForHelper
Expand Down Expand Up @@ -176,6 +189,10 @@ def format_iso8061(datetime)
redirect_from_title(title)
end

get %r{^/wikidata/Q(\d+)$} do |id|
redirect_from_wikidata(id)
end

get %r{^/things/(\d+)\.?([a-z0-9]*)$} do |pageid,format|
begin
@thing = WikipediaThing.load(pageid)
Expand Down Expand Up @@ -222,6 +239,8 @@ def format_iso8061(datetime)
redirect_from_title($2)
elsif params[:url] =~ %r{^http://dbpedia.org/(page|resource|data)/(.+)$}
redirect_from_title($2)
elsif params[:url] =~ %r{^http://(www\.)?wikidata.org/wiki/Q(\d+)$}
redirect_from_wikidata($2)
elsif params[:url] =~ %r{^http://www.freebase.com/(view|inspect|edit/topic)(/.+)$}
begin
data = FreebaseApi.lookup_by_id($2)
Expand Down
111 changes: 111 additions & 0 deletions lib/mediawiki_api.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
require 'net/http'
require 'uri'

class MediaWikiApi

class Exception < Exception
end

class NotFound < MediaWikiApi::Exception
end

USER_AGENT = 'DbpediaLite/1'
HTTP_TIMEOUT = 5
NBSP = Nokogiri::HTML("&nbsp;").text
UNSAFE_REGEXP = Regexp.new('[^-_\.!~*\'()a-zA-Z0-9;/:@&=$,]', false, 'N').freeze
DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^a-zA-Z0-9\.\-*/:_,&]', false, 'N').freeze

def self.escape_query(str)
URI::escape(str, UNSAFE_REGEXP)
end

def self.escape_title(title)
URI::escape(title.gsub(' ','_'), ' ?#%"+=')
end

def self.title_to_dbpedia_key(title)
# From http://dbpedia.org/URIencoding
URI::escape(title.gsub(' ', '_').squeeze('_'), DBPEDIA_UNSAFE_REGEXP)
end

def self.clean_displaytitle(hash)
if hash['displaytitle']
hash['displaytitle'] = Nokogiri::HTML(hash['displaytitle']).text
end
end

def self.get(action, args={})
items = []
args.merge!(:action => action, :format => 'json')

keys = args.keys.sort {|a,b| a.to_s <=> b.to_s}
keys.each do |key|
items << escape_query(key.to_s)+'='+escape_query(args[key].to_s)
end

uri = self.api_uri
uri.query = items.join('&')
res = Net::HTTP.start(uri.host, uri.port) do |http|
http.read_timeout = HTTP_TIMEOUT
http.open_timeout = HTTP_TIMEOUT
http.get(uri.request_uri, {'User-Agent' => USER_AGENT})
end

# Throw exception if unsuccessful
res.value

# Parse the response if it is JSON
if res.content_type == 'application/json'
data = JSON.parse(res.body)
else
raise MediaWikiApi::Exception.new(
"Response from MediaWiki API was not of type application/json."
)
end

# Check for errors in the response
if data.nil?
raise MediaWikiApi::Exception.new('Empty response')
elsif data.has_key?('error')
if data['error']['code'] == 'nosuchpageid'
raise MediaWikiApi::NotFound.new(
data['error']['info']
)
else
raise MediaWikiApi::Exception.new(
data['error']['info']
)
end
end

return data
end

def self.page_info(args)
data = self.get('query', {
:prop => 'info',
:inprop => 'displaytitle',
:redirects => 1
}.merge(args))

if data['query'].nil? or data['query']['pages'].empty?
raise WikipediaApi::Exception.new('Empty response')
else
info = data['query']['pages'].values.first
if info.has_key?('missing')
raise WikipediaApi::PageNotFound.new
else
clean_displaytitle(info)
return info
end
end
end

def self.search(query, args={})
data = self.get('query', {:list => 'search', :srprop => 'snippet|titlesnippet', :srsearch => query}.merge(args))

data['query']['search']
end


end
28 changes: 28 additions & 0 deletions lib/wikidata_api.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
require 'mediawiki_api'

class WikidataApi < MediaWikiApi

def self.api_uri
URI.parse('http://wikidata.org/w/api.php')
end

def self.get_sitelink(id, site='enwiki')
data = self.get('wbgetitems', {
:ids => id,
:props => 'sitelinks',
:languages => 'en',
:sites => site
})

if data['items'].nil?
raise MediaWiki::Exception.new('Empty response')
elsif data['items'][id].nil?
raise MediaWiki::NotFound.new('Wikidata identifier does not exist')
elsif data['items'][id]['sitelinks'][site].nil?
raise MediaWiki::NotFound.new('Sitelink does not exist for Wikidata identifier')
else
return data['items'][id]['sitelinks'][site]
end
end

end

0 comments on commit ce1d35a

Please sign in to comment.