Permalink
Browse files

Hacked in support for redirecting from Wikidata identifiers

  • Loading branch information...
1 parent 4f9813a commit ce1d35a9239274e6608195160a237c55ef8bae2c @njh committed Nov 24, 2012
Showing with 158 additions and 0 deletions.
  1. +19 −0 dbpedialite.rb
  2. +111 −0 lib/mediawiki_api.rb
  3. +28 −0 lib/wikidata_api.rb
View
@@ -2,6 +2,7 @@
require 'wikipedia_thing'
require 'wikipedia_category'
+require 'wikidata_api'
require 'formats'
@@ -79,6 +80,18 @@ def redirect_from_title(title)
end
end
+ def redirect_from_wikidata(id)
+ begin
+ sitelink = WikidataApi.get_sitelink(id)
+ redirect_from_title sitelink['title']
+ rescue MediaWiki::NotFound => e
+ not_found e.to_s
+ rescue MediaWiki::Exception => e
+ error 500, "Wikidata API excpetion: #{e}"
+ end
+ redirect_from_title(title)
+ end
+
helpers do
include Sinatra::ContentFor
include Sinatra::UrlForHelper
@@ -176,6 +189,10 @@ def format_iso8061(datetime)
redirect_from_title(title)
end
+ get %r{^/wikidata/Q(\d+)$} do |id|
+ redirect_from_wikidata(id)
+ end
+
get %r{^/things/(\d+)\.?([a-z0-9]*)$} do |pageid,format|
begin
@thing = WikipediaThing.load(pageid)
@@ -222,6 +239,8 @@ def format_iso8061(datetime)
redirect_from_title($2)
elsif params[:url] =~ %r{^http://dbpedia.org/(page|resource|data)/(.+)$}
redirect_from_title($2)
+ elsif params[:url] =~ %r{^http://(www\.)?wikidata.org/wiki/Q(\d+)$}
+ redirect_from_wikidata($2)
elsif params[:url] =~ %r{^http://www.freebase.com/(view|inspect|edit/topic)(/.+)$}
begin
data = FreebaseApi.lookup_by_id($2)
View
@@ -0,0 +1,111 @@
+require 'net/http'
+require 'uri'
+
+class MediaWikiApi
+
+ class Exception < Exception
+ end
+
+ class NotFound < MediaWikiApi::Exception
+ end
+
+ USER_AGENT = 'DbpediaLite/1'
+ HTTP_TIMEOUT = 5
+ NBSP = Nokogiri::HTML("&nbsp;").text
+ UNSAFE_REGEXP = Regexp.new('[^-_\.!~*\'()a-zA-Z0-9;/:@&=$,]', false, 'N').freeze
+ DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^a-zA-Z0-9\.\-*/:_,&]', false, 'N').freeze
+
+ def self.escape_query(str)
+ URI::escape(str, UNSAFE_REGEXP)
+ end
+
+ def self.escape_title(title)
+ URI::escape(title.gsub(' ','_'), ' ?#%"+=')
+ end
+
+ def self.title_to_dbpedia_key(title)
+ # From http://dbpedia.org/URIencoding
+ URI::escape(title.gsub(' ', '_').squeeze('_'), DBPEDIA_UNSAFE_REGEXP)
+ end
+
+ def self.clean_displaytitle(hash)
+ if hash['displaytitle']
+ hash['displaytitle'] = Nokogiri::HTML(hash['displaytitle']).text
+ end
+ end
+
+ def self.get(action, args={})
+ items = []
+ args.merge!(:action => action, :format => 'json')
+
+ keys = args.keys.sort {|a,b| a.to_s <=> b.to_s}
+ keys.each do |key|
+ items << escape_query(key.to_s)+'='+escape_query(args[key].to_s)
+ end
+
+ uri = self.api_uri
+ uri.query = items.join('&')
+ res = Net::HTTP.start(uri.host, uri.port) do |http|
+ http.read_timeout = HTTP_TIMEOUT
+ http.open_timeout = HTTP_TIMEOUT
+ http.get(uri.request_uri, {'User-Agent' => USER_AGENT})
+ end
+
+ # Throw exception if unsuccessful
+ res.value
+
+ # Parse the response if it is JSON
+ if res.content_type == 'application/json'
+ data = JSON.parse(res.body)
+ else
+ raise MediaWikiApi::Exception.new(
+ "Response from MediaWiki API was not of type application/json."
+ )
+ end
+
+ # Check for errors in the response
+ if data.nil?
+ raise MediaWikiApi::Exception.new('Empty response')
+ elsif data.has_key?('error')
+ if data['error']['code'] == 'nosuchpageid'
+ raise MediaWikiApi::NotFound.new(
+ data['error']['info']
+ )
+ else
+ raise MediaWikiApi::Exception.new(
+ data['error']['info']
+ )
+ end
+ end
+
+ return data
+ end
+
+ def self.page_info(args)
+ data = self.get('query', {
+ :prop => 'info',
+ :inprop => 'displaytitle',
+ :redirects => 1
+ }.merge(args))
+
+ if data['query'].nil? or data['query']['pages'].empty?
+ raise WikipediaApi::Exception.new('Empty response')
+ else
+ info = data['query']['pages'].values.first
+ if info.has_key?('missing')
+ raise WikipediaApi::PageNotFound.new
+ else
+ clean_displaytitle(info)
+ return info
+ end
+ end
+ end
+
+ def self.search(query, args={})
+ data = self.get('query', {:list => 'search', :srprop => 'snippet|titlesnippet', :srsearch => query}.merge(args))
+
+ data['query']['search']
+ end
+
+
+end
View
@@ -0,0 +1,28 @@
+require 'mediawiki_api'
+
+class WikidataApi < MediaWikiApi
+
+ def self.api_uri
+ URI.parse('http://wikidata.org/w/api.php')
+ end
+
+ def self.get_sitelink(id, site='enwiki')
+ data = self.get('wbgetitems', {
+ :ids => id,
+ :props => 'sitelinks',
+ :languages => 'en',
+ :sites => site
+ })
+
+ if data['items'].nil?
+ raise MediaWiki::Exception.new('Empty response')
+ elsif data['items'][id].nil?
+ raise MediaWiki::NotFound.new('Wikidata identifier does not exist')
+ elsif data['items'][id]['sitelinks'][site].nil?
+ raise MediaWiki::NotFound.new('Sitelink does not exist for Wikidata identifier')
+ else
+ return data['items'][id]['sitelinks'][site]
+ end
+ end
+
+end

0 comments on commit ce1d35a

Please sign in to comment.