Permalink
Browse files

Merge pull request #62 from bbcarchdev/master

Update dbpedia uri-encoding unsafe characters.
  • Loading branch information...
2 parents 29455ae + a66e5da commit fd1c0ff4cc9ea3d6ad9c3517deba8da9eaf03857 @njh committed Feb 24, 2016
Showing with 4 additions and 4 deletions.
  1. +2 −2 lib/wikipedia_api.rb
  2. +1 −1 spec/base_model_spec.rb
  3. +1 −1 spec/wikipedia_api_spec.rb
View
4 lib/wikipedia_api.rb
@@ -4,7 +4,7 @@ class WikipediaApi < MediaWikiApi
ABSTRACT_MAX_LENGTH = 500
ABSTRACT_TRUNCATE_LENGTH = 700
- DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^a-zA-Z0-9\.\-*/:_,&]', false, 'N').freeze
+ DBPEDIA_UNSAFE_REGEXP = Regexp.new('[^!\$&\'\(\)*\+,\-\./0-9:;=@A-Z_a-z~]', false, 'N').freeze
class Redirect < MediaWikiApi::Exception
attr_reader :pageid
@@ -21,7 +21,7 @@ def self.api_uri
end
def self.title_to_dbpedia_key(title)
- # From http://dbpedia.org/URIencoding
+ # From http://dbpedia.org/uri-encoding
URI::escape(title.gsub(' ', '_').squeeze('_'), DBPEDIA_UNSAFE_REGEXP)
end
View
2 spec/base_model_spec.rb
@@ -74,7 +74,7 @@
end
it "should respond to 'dbpedia_uri' with the dbpedia URI" do
- @obj.dbpedia_uri.should == RDF::URI('http://dbpedia.org/resource/Keith_Allen_%28actor%29')
+ @obj.dbpedia_uri.should == RDF::URI('http://dbpedia.org/resource/Keith_Allen_(actor)')
end
end
View
2 spec/wikipedia_api_spec.rb
@@ -5,7 +5,7 @@
describe WikipediaApi do
context "escaping a page title to a DBpedia key" do
it "should apply the encoding rules from dbpedia.org" do
- WikipediaApi.title_to_dbpedia_key('Mozambique (Portugal)').should == 'Mozambique_%28Portugal%29'
+ WikipediaApi.title_to_dbpedia_key('Mozambique (Portugal)').should == 'Mozambique_(Portugal)'
WikipediaApi.title_to_dbpedia_key('S/2012_P_1').should == 'S/2012_P_1'
end
end

0 comments on commit fd1c0ff

Please sign in to comment.