Skip to content

Commit

Permalink
Merge branch 'master' into spec_compliance, fixed up the tests
Browse files Browse the repository at this point in the history
Conflicts:
	lib/oai/provider/response.rb
	lib/oai/provider/response/identify.rb
	lib/oai/provider/response/record_response.rb
	test/activerecord_provider/database/oaipmhtest
	test/activerecord_provider/tc_ar_provider.rb
	test/activerecord_provider/tc_caching_paging_provider.rb
	test/activerecord_provider/tc_simple_paging_provider.rb
	test/provider/tc_exceptions.rb
	test/provider/tc_functional_tokens.rb
	test/provider/tc_provider.rb
  • Loading branch information
averell23 committed Jul 9, 2009
2 parents 04748b1 + 0437a6c commit 5ece5a2
Show file tree
Hide file tree
Showing 37 changed files with 371 additions and 247 deletions.
18 changes: 18 additions & 0 deletions Changes
@@ -1,3 +1,21 @@
v0.0.12 Monday Dec. 23
- libxml is deprecating node.property, so added support to new syntax to xpath.rb

v0.0.11 Monday Sept. 15
- fixed problem in client/response dealing with libxml call that was missed during recent libxml .5+ support corrections.
- Thank you to Bjorn Hjelle for pointing out the above oversight.

v0.0.10 Friday Sept. 12
- Changed providers functionality to support proper date formatting
v0.0.9 Tue Aug 12, 2008
- change to xpath.rb file to add support to libxml-ruby 0.8+ branch.

v0.0.6 Tue Jun 5, 2007
- change to resumption token location in the response, to better conform to the specification.
- oai shell date parsing should now be working correctly.
- ability to list metadata formats for an individual record.
- AR returning Time object bug fixed.

v0.0.5 Fri Feb 16, 2007
- fixed problem with provider supplying metadata for deleted records

Expand Down
3 changes: 2 additions & 1 deletion README
Expand Up @@ -76,5 +76,6 @@ Where x.y.z is the version of the gem that was generated.

== AUTHORS

- Ed Summers <ehs@pobox>
- Ed Summers <ehs@pobox.com>
- William Groppe <will.groppe@gmail.com>
- Terry Reese <terry.reese@oregonstate.edu>
4 changes: 1 addition & 3 deletions Rakefile
@@ -1,4 +1,4 @@
RUBY_OAI_VERSION = '0.0.6'
RUBY_OAI_VERSION = '0.0.12'

require 'rubygems'
require 'rake'
Expand All @@ -25,8 +25,6 @@ spec = Gem::Specification.new do |s|
s.bindir = 'bin'
s.executables = 'oai'

s.add_dependency('activesupport', '>=1.3.1')
s.add_dependency('chronic', '>=0.0.3')
s.add_dependency('builder', '>=2.0.0')

s.files = %w(README Rakefile) +
Expand Down
59 changes: 30 additions & 29 deletions lib/oai/client.rb
Expand Up @@ -32,8 +32,9 @@ module OAI
#
# client = OAI::Client.new 'http://www.pubmedcentral.gov/oai/oai.cgi'
# record = client.get_record :identifier => 'oai:pubmedcentral.gov:13901'
# for identifier in client.list_identifiers :metadata_prefix => 'oai_dc'
# puts identifier.
# for identifier in client.list_identifiers
# puts identifier
# end
#
# It is worth noting that the api uses methods and parameter names with
# underscores in them rather than studly caps. So above list_identifiers
Expand Down Expand Up @@ -155,7 +156,7 @@ def list_sets(opts={})
def do_request(verb, opts = nil)
# fire off the request and return appropriate DOM object
uri = build_uri(verb, opts)
xml = get(uri)
xml = strip_invalid_utf_8_chars(get(uri))
if @parser == 'libxml'
# remove default namespace for oai-pmh since libxml
# isn't able to use our xpaths to get at them
Expand All @@ -176,44 +177,30 @@ def build_uri(verb, opts)

def encode(value)
return CGI.escape(value) unless value.respond_to?(:strftime)
if value.respond_to?(:to_time) # Usually a DateTime or Time
value.to_time.utc.xmlschema
if value.kind_of?(DateTime)
Time.parse(value.asctime).utc.xmlschema
elsif value.kind_of?(Time)
value.utc.xmlschema
else # Assume something date like
value.strftime('%Y-%m-%d')
end
end

def load_document(xml)
retried = false
case @parser
when 'libxml'
begin
parser = XML::Parser.new()
parser.string = xml
return parser.parse
rescue XML::Parser::ParseError => e
if retried
raise OAI::Exception, 'response not well formed XML: '+e, caller
end
ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
xml2 = ic.iconv(xml << ' ')[0..-2]
puts "equal? #{xml == xml2}"
retried = true
retry
raise OAI::Exception, 'response not well formed XML: '+e, caller
end
when 'rexml'
begin
return REXML::Document.new(xml)
rescue REXML::ParseException => e
if retried
puts xml
raise OAI::Exception, 'response not well formed XML: '+e, caller
end
puts "RETRYING"
ic = Iconv.new('UTF-8//IGNORE', 'UTF-8')
xml = ic.iconv(xml << ' ')[0..-2]
retried = true
retry
raise OAI::Exception, 'response not well formed XML: '+e.message, caller
end
end
end
Expand Down Expand Up @@ -288,13 +275,27 @@ def externalize(value)
def parse_date(value)
return value if value.respond_to?(:strftime)

# Oddly Chronic doesn't parse an UTC encoded datetime.
# Luckily Time does
dt = Chronic.parse(value) || Time.parse(value)
raise OAI::ArgumentError.new unless dt

dt.utc
Date.parse(value) # This will raise an exception for badly formatted dates
Time.parse(value).utc # Sadly, this will not
rescue
raise OAI::ArgumentError.new
end

# Strip out invalid UTF-8 characters. Regex from the W3C, inverted.
# http://www.w3.org/International/questions/qa-forms-utf-8.en.php
#
# Regex is from WebCollab:
# http://webcollab.sourceforge.net/unicode.html
def strip_invalid_utf_8_chars(xml)
simple_bytes = xml.gsub(/[\x00-\x08\x10\x0B\x0C\x0E-\x19\x7F]
| [\x00-\x7F][\x80-\xBF]+
| ([\xC0\xC1]|[\xF0-\xFF])[\x80-\xBF]*
| [\xC2-\xDF]((?![\x80-\xBF])|[\x80-\xBF]{2,})
| [\xE0-\xEF](([\x80-\xBF](?![\x80-\xBF]))
| (?![\x80-\xBF]{2})|[\x80-\xBF]{3,})/x, '?')
simple_bytes.gsub(/\xE0[\x80-\x9F][\x80-\xBF]
| \xED[\xA0-\xBF][\x80-\xBF]/,'?')
end

end
end
2 changes: 1 addition & 1 deletion lib/oai/client/header.rb
Expand Up @@ -7,7 +7,7 @@ def initialize(element)
@status = get_attribute(element, 'status')
@identifier = xpath(element, './/identifier')
@datestamp = xpath(element, './/datestamp')
@set_spec = xpath(element, './/setSpec')
@set_spec = xpath_all(element, './/setSpec')
end

def deleted?
Expand Down
2 changes: 1 addition & 1 deletion lib/oai/client/identify.rb
Expand Up @@ -22,7 +22,7 @@ def to_s

# returns REXML::Element nodes for each description section
# if the OAI::Client was configured to use libxml then you will
# instead get a XML::Node object.
# instead get a LibXML::XML::Node object.
def descriptions
return xpath_all(doc, './/Identify/description')
end
Expand Down
13 changes: 11 additions & 2 deletions lib/oai/client/response.rb
Expand Up @@ -15,9 +15,18 @@ def initialize(doc)
when 'REXML::Element'
message = error.text
code = error.attributes['code']
when 'XML::Node'
when 'LibXML::XML::Node'
message = error.content
code = error.property('code')
code = ""
if defined?(error.property) == nil
code = error.attributes['code']
else
begin
code = error["code"]
rescue
code = error.property('code')
end
end
end
raise OAI::Exception.new(message, code)
end
Expand Down
1 change: 1 addition & 0 deletions lib/oai/harvester.rb
Expand Up @@ -9,6 +9,7 @@
require 'chronic'
require 'socket'

require 'oai/client'
require 'oai/harvester/config'
require 'oai/harvester/harvest'
require 'oai/harvester/logging'
Expand Down
14 changes: 9 additions & 5 deletions lib/oai/harvester/shell.rb
Expand Up @@ -45,10 +45,12 @@ def start
else
self.send(cmd.shift, cmd.join(" "))
end
rescue
puts "Not a recognized command, or bad options. Type 'help' for clues."
#puts $!
#puts $!.backtrace.join("\n")
rescue NoMethodError
puts "Not a recognized command. Type 'help' for clues."
rescue
puts "An error occurred:"
puts $!
puts $!.backtrace.join("\n")
end
end
end
Expand All @@ -73,7 +75,7 @@ def harvest(options)
banner "Harvesting '#{site}'"
if date && !date.empty?
begin
date = Chronic.parse(date.join(' ')).utc
date = Time.parse(date.join(' ')).utc
rescue NoMethodError
puts "Couldn't parse the date supplied"
return
Expand Down Expand Up @@ -110,6 +112,8 @@ def info(args)
print_site(site)
end
puts
rescue
puts args + " doesn't appear to be configured, use list to see configured repositories."
end

def new
Expand Down
8 changes: 4 additions & 4 deletions lib/oai/provider.rb
@@ -1,6 +1,6 @@
require 'active_support'
require 'rexml/document'
require 'singleton'
require 'builder'
require 'chronic'

if not defined?(OAI::Const::VERBS)
require 'oai/exception'
Expand Down Expand Up @@ -266,8 +266,8 @@ def get_record(options = {})
Response::GetRecord.new(self.class, options).to_xml
end

# xml_response = process_verb('ListRecords', :from => 'October',
# :until => 'November') # thanks Chronic!
# xml_response = process_verb('ListRecords', :from => 'October 1, 2005',
# :until => 'November 1, 2005')
#
# If you are implementing a web interface using process_request is the
# preferred way.
Expand Down
66 changes: 59 additions & 7 deletions lib/oai/provider/metadata_format.rb
@@ -1,3 +1,5 @@
require 'singleton'

module OAI::Provider::Metadata
# == Metadata Base Class
#
Expand Down Expand Up @@ -52,13 +54,14 @@ def encode(model, record)
# 2. Try calling the pluralized name method on the model.
# 3. Try calling the singular name method on the model
def value_for(field, record, map)
method = map[field] ? map[field].to_s : field.to_s

methods = record.public_methods(false)
if methods.include?(method.pluralize)
record.send method.pluralize
elsif methods.include?(method)
record.send method
method = map[field] ? map[field].to_s : field.to_s

if record.respond_to?(pluralize(method))
record.send pluralize(method)
elsif record.respond_to?(method)
# at this point, this function will throw a dep. error because of the call to type -- a reserved work
# in ruby
silence_warnings { record.send method }
else
[]
end
Expand All @@ -68,6 +71,55 @@ def value_for(field, record, map)
def header_specification
raise NotImplementedError.new
end

# Shamelessly lifted form ActiveSupport. Thanks Rails community!
def pluralize(word)
# Use ActiveSupports pluralization if it's available.
return word.pluralize if word.respond_to?(:pluralize)

# Otherwise use our own simple pluralization rules.
result = word.to_s.dup

# Uncountable words
return result if %w(equipment information rice money species series fish sheep).include?(result)

# Irregular words
{ 'person' => 'people', 'man' => 'men', 'child' => 'children', 'sex' => 'sexes',
'move' => 'moves', 'cow' => 'kine' }.each { |k,v| return v if word == k }

rules.each { |(rule, replacement)| break if result.gsub!(rule, replacement) }
result
end

def rules
[
[/$/, 's'],
[/s$/i, 's'],
[/(ax|test)is$/i, '\1es'],
[/(octop|vir)us$/i, '\1i'],
[/(alias|status)$/i, '\1es'],
[/(bu)s$/i, '\1ses'],
[/(buffal|tomat)o$/i, '\1oes'],
[/([ti])um$/i, '\1a'],
[/sis$/i, 'ses'],
[/(?:([^f])fe|([lr])f)$/i, '\1\2ves'],
[/(hive)$/i, '\1s'],
[/([^aeiouy]|qu)y$/i, '\1ies'],
[/(x|ch|ss|sh)$/i, '\1es'],
[/(matr|vert|ind)(?:ix|ex)$/i, '\1ices'],
[/([m|l])ouse$/i, '\1ice'],
[/^(ox)$/i, '\1en'],
[/(quiz)$/i, '\1zes']
]
end

def silence_warnings
old_verbose, $VERBOSE = $VERBOSE, nil
yield
ensure
$VERBOSE = old_verbose
end


end

Expand Down
1 change: 0 additions & 1 deletion lib/oai/provider/model/activerecord_caching_wrapper.rb
Expand Up @@ -103,7 +103,6 @@ def select_partial(token)
end

oaitoken = OaiToken.find_by_token(token.to_s)

raise ResumptionTokenException.new unless oaitoken

PartialResult.new(
Expand Down

0 comments on commit 5ece5a2

Please sign in to comment.