Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Converted WikipediaApi into a subclass of MediaWikiApi

  • Loading branch information...
commit 09739408bc0772c97f2ca368278058fe9d3b1a2d 1 parent 45e8973
@njh authored
View
10 dbpedialite.rb
@@ -73,9 +73,9 @@ def redirect_from_title(title)
else
error 500, "Unsupported Wikipedia namespace: #{data['ns']}"
end
- rescue WikipediaApi::PageNotFound
+ rescue MediaWikiApi::NotFound
not_found "Wikipedia page title not found."
- rescue WikipediaApi::Exception => e
+ rescue MediaWikiApi::Exception => e
error 500, "Wikipedia API excpetion: #{e}"
end
end
@@ -198,7 +198,7 @@ def format_iso8061(datetime)
@thing = WikipediaThing.load(pageid)
rescue WikipediaApi::Redirect => redirect
redirect("/things/#{redirect.pageid}", 301)
- rescue WikipediaApi::PageNotFound
+ rescue MediaWikiApi::NotFound
not_found("Thing not found.")
end
@@ -214,7 +214,7 @@ def format_iso8061(datetime)
@category = WikipediaCategory.load(pageid)
rescue WikipediaApi::Redirect => redirect
redirect("/categories/#{redirect.pageid}", 301)
- rescue WikipediaApi::PageNotFound
+ rescue MediaWikiApi::NotFound
not_found("Category not found.")
end
@@ -253,7 +253,7 @@ def format_iso8061(datetime)
data = WikipediaApi.page_info(:pageids => $3)
escaped = WikipediaApi.escape_title(data['title'])
redirect "http://en.wikipedia.org/wiki/#{escaped}", 301
- rescue WikipediaApi::PageNotFound
+ rescue MediaWikiApi::NotFound
not_found("Wikipedia page id not found")
end
else
View
49 lib/mediawiki_api.rb
@@ -3,18 +3,17 @@
class MediaWikiApi
+ USER_AGENT = 'DbpediaLite/1'
+ HTTP_TIMEOUT = 5
+ NBSP = Nokogiri::HTML(" ").text
+ UNSAFE_REGEXP = Regexp.new('[^-_\.!~*\'()a-zA-Z0-9;/:@&=$,]', false, 'N').freeze
+
class Exception < Exception
end
class NotFound < MediaWikiApi::Exception
end
- USER_AGENT = 'DbpediaLite/1'
- HTTP_TIMEOUT = 5
- NBSP = Nokogiri::HTML("&nbsp;").text
- UNSAFE_REGEXP = Regexp.new('[^-_\.!~*\'()a-zA-Z0-9;/:@&=$,]', false, 'N').freeze
- DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^a-zA-Z0-9\.\-*/:_,&]', false, 'N').freeze
-
def self.escape_query(str)
URI::escape(str, UNSAFE_REGEXP)
end
@@ -23,17 +22,6 @@ def self.escape_title(title)
URI::escape(title.gsub(' ','_'), ' ?#%"+=')
end
- def self.title_to_dbpedia_key(title)
- # From http://dbpedia.org/URIencoding
- URI::escape(title.gsub(' ', '_').squeeze('_'), DBPEDIA_UNSAFE_REGEXP)
- end
-
- def self.clean_displaytitle(hash)
- if hash['displaytitle']
- hash['displaytitle'] = Nokogiri::HTML(hash['displaytitle']).text
- end
- end
-
def self.get(action, args={})
items = []
args.merge!(:action => action, :format => 'json')
@@ -81,31 +69,4 @@ def self.get(action, args={})
return data
end
- def self.page_info(args)
- data = self.get('query', {
- :prop => 'info',
- :inprop => 'displaytitle',
- :redirects => 1
- }.merge(args))
-
- if data['query'].nil? or data['query']['pages'].empty?
- raise WikipediaApi::Exception.new('Empty response')
- else
- info = data['query']['pages'].values.first
- if info.has_key?('missing')
- raise WikipediaApi::PageNotFound.new
- else
- clean_displaytitle(info)
- return info
- end
- end
- end
-
- def self.search(query, args={})
- data = self.get('query', {:list => 'search', :srprop => 'snippet|titlesnippet', :srsearch => query}.merge(args))
-
- data['query']['search']
- end
-
-
end
View
84 lib/wikipedia_api.rb
@@ -1,15 +1,12 @@
-require 'net/http'
-require 'uri'
+require 'mediawiki_api'
-module WikipediaApi
+class WikipediaApi < MediaWikiApi
- class Exception < Exception
- end
-
- class PageNotFound < WikipediaApi::Exception
- end
+ ABSTRACT_MAX_LENGTH = 500
+ ABSTRACT_TRUNCATE_LENGTH = 700
+ DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^a-zA-Z0-9\.\-*/:_,&]', false, 'N').freeze
- class Redirect < WikipediaApi::Exception
+ class Redirect < MediaWikiApi::Exception
attr_reader :pageid
attr_reader :title
@@ -19,21 +16,8 @@ def initialize(pageid, title)
end
end
- USER_AGENT = 'DbpediaLite/1'
- API_URI = URI.parse('http://en.wikipedia.org/w/api.php')
- ABSTRACT_MAX_LENGTH = 500
- ABSTRACT_TRUNCATE_LENGTH = 700
- HTTP_TIMEOUT = 5
- NBSP = Nokogiri::HTML("&nbsp;").text
- UNSAFE_REGEXP = Regexp.new('[^-_\.!~*\'()a-zA-Z0-9;/:@&=$,]', false, 'N').freeze
- DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^a-zA-Z0-9\.\-*/:_,&]', false, 'N').freeze
-
- def self.escape_query(str)
- URI::escape(str, UNSAFE_REGEXP)
- end
-
- def self.escape_title(title)
- URI::escape(title.gsub(' ','_'), ' ?#%"+=')
+ def self.api_uri
+ URI.parse('http://en.wikipedia.org/w/api.php')
end
def self.title_to_dbpedia_key(title)
@@ -55,11 +39,11 @@ def self.page_info(args)
}.merge(args))
if data['query'].nil? or data['query']['pages'].empty?
- raise WikipediaApi::Exception.new('Empty response')
+ raise MediaWikiApi::Exception.new('Empty response')
else
info = data['query']['pages'].values.first
if info.has_key?('missing')
- raise WikipediaApi::PageNotFound.new
+ raise MediaWikiApi::NotFound.new
else
clean_displaytitle(info)
return info
@@ -73,53 +57,6 @@ def self.search(query, args={})
data['query']['search']
end
- def self.get(action, args={})
- items = []
- args.merge!(:action => action, :format => 'json')
-
- keys = args.keys.sort {|a,b| a.to_s <=> b.to_s}
- keys.each do |key|
- items << escape_query(key.to_s)+'='+escape_query(args[key].to_s)
- end
-
- uri = API_URI.clone
- uri.query = items.join('&')
- res = Net::HTTP.start(uri.host, uri.port) do |http|
- http.read_timeout = HTTP_TIMEOUT
- http.open_timeout = HTTP_TIMEOUT
- http.get(uri.request_uri, {'User-Agent' => USER_AGENT})
- end
-
- # Throw exception if unsuccessful
- res.value
-
- # Parse the response if it is JSON
- if res.content_type == 'application/json'
- data = JSON.parse(res.body)
- else
- raise WikipediaApi::Exception.new(
- "Response from Wikipedia API was not of type application/json."
- )
- end
-
- # Check for errors in the response
- if data.nil?
- raise WikipediaApi::Exception.new('Empty response')
- elsif data.has_key?('error')
- if data['error']['code'] == 'nosuchpageid'
- raise WikipediaApi::PageNotFound.new(
- data['error']['info']
- )
- else
- raise WikipediaApi::Exception.new(
- data['error']['info']
- )
- end
- end
-
- return data
- end
-
def self.category_members(pageid, args={})
data = self.get('query', {
:generator => 'categorymembers',
@@ -147,7 +84,6 @@ def self.page_categories(pageid, args={})
values.each {|v| clean_displaytitle(v) }
end
-
def self.parse(pageid, args={})
data = self.get('parse', {
:prop => 'text|displaytitle',
View
2  lib/wikipedia_category.rb
@@ -13,7 +13,7 @@ def load
# Is it actually a category?
unless data['ns'] == 14
- raise WikipediaApi::PageNotFound.new("Page #{pageid} is not a category")
+ raise MediaWikiApi::NotFound.new("Page #{pageid} is not a category")
end
# Update object properties with the data that was loaded
View
2  scripts/daily_ping.rb
@@ -93,7 +93,7 @@ def ping_the_semantic_web(ping_url)
ping_the_semantic_web(rdf_url)
rdf_urls << rdf_url
end
- rescue WikipediaApi::PageNotFound
+ rescue MediaWikiApi::NotFound
end
end
View
8 spec/wikipedia_api_spec.rb
@@ -412,7 +412,7 @@
it "should raise an exception" do
lambda {WikipediaApi.parse(504825766)}.should raise_error(
- WikipediaApi::PageNotFound,
+ MediaWikiApi::NotFound,
'There is no page with ID 504825766'
)
end
@@ -523,7 +523,7 @@
it "should trow a PageNotFound exception" do
lambda { WikipediaApi.page_info(:titles => 'zsefpfs') }.should raise_error(
- WikipediaApi::PageNotFound
+ MediaWikiApi::NotFound
)
end
end
@@ -539,8 +539,8 @@
it "should raise an exception" do
expect { WikipediaApi.get('query') }.should raise_error(
- WikipediaApi::Exception,
- 'Response from Wikipedia API was not of type application/json.'
+ MediaWikiApi::Exception,
+ 'Response from MediaWiki API was not of type application/json.'
)
end
end
View
2  spec/wikipedia_category_spec.rb
@@ -132,7 +132,7 @@
it "should return raise a PageNotFound exception" do
lambda {WikipediaCategory.load(52780)}.should raise_error(
- WikipediaApi::PageNotFound,
+ MediaWikiApi::NotFound,
'Page 52780 is not a category'
)
end
View
4 spec/wikipedia_thing_spec.rb
@@ -213,7 +213,7 @@
context "loading a non-existant page from wikipedia" do
before :each do
WikipediaApi.expects(:parse).once.raises(
- WikipediaApi::PageNotFound,
+ MediaWikiApi::NotFound,
'There is no page with ID 999999'
)
FreebaseApi.expects(:lookup_wikipedia_pageid).never
@@ -221,7 +221,7 @@
it "should return raise a PageNotFound exception" do
lambda {WikipediaThing.load(999999)}.should raise_error(
- WikipediaApi::PageNotFound,
+ MediaWikiApi::NotFound,
'There is no page with ID 999999'
)
end
Please sign in to comment.
Something went wrong with that request. Please try again.