Permalink
Browse files

changes missed in the svn migration

  • Loading branch information...
1 parent 30a623f commit ac1a3cc20e3f6d40efdc3fad0503b9351a45701a @jrochkind jrochkind committed Oct 9, 2007
View
18 app/controllers/resolve_controller.rb
@@ -55,7 +55,9 @@ def init_processing
@user_request ||= Request.new_request(params, session, request )
- @collection = Collection.new(request.remote_ip, session)
+ # Ip may be simulated with req.ip in context object, or may be
+ # actual, request figured it out for us.
+ @collection = Collection.new(@user_request.client_ip_addr, session)
@user_request.save!
# Set 'timed out' background services to dead if neccesary.
@@ -396,6 +398,9 @@ def should_skip_menu
# with a lambda config.
helper_method :'known_frame_escaper?'
def known_frame_escaper?(service_type)
+
+ bad_target_regexps = [/^WILSON\_/, 'SAGE_COMPLETE']
+ bad_url_regexps = [/http\:\/\/www.bmj.com/, /http\:\/\/bmj.bmjjournals.com/]
response = service_type.service_response
@@ -404,13 +409,16 @@ def known_frame_escaper?(service_type)
# Can't say it is, nope.
return false;
end
-
+
sfx_target_name = response.service_data[:sfx_target_name]
+ url = response.url
- bad_target_regexps = [/^WILSON\_/, 'SAGE_COMPLETE']
-
# Does our target name match any of our regexps?
- return bad_target_regexps.find_all {|re| re === sfx_target_name }.length > 0
+ bad_target = bad_target_regexps.find_all {|re| re === sfx_target_name }.length > 0
+
+ return bad_target if bad_target
+ # Now check url if neccesary
+ return bad_url_regexps.find_all {|re| re === url }.length > 0
end
# Helper method used here in controller for outputting js to
View
22 app/models/collection.rb
@@ -11,15 +11,25 @@ class Collection
def initialize(ip, session)
@institutions= []
@services = {}
- # fill out 0..9 and a..z
@link_out_filters = {}
# fill out 0..9
+
+ # If IP address has changed, then refresh the collection
+ # (IP can change in a session if you take your laptop to a different
+ # wireless net; if you turn on VPN; if you've specified a new
+ # ip in the req.ip context object; various other reasons. )
+ if ( session[:collection] &&
+ session[:collection][:client_ip_addr] != ip)
+ session[:refresh_collection] = true
+ end
+
if session[:refresh_collection] == true
session[:collection] = nil
session[:refresh_collection] = false
end
+
# Data has been created and stored in session already, load it from
# there. Code can set session[:refresh_collection] = true to force
# re-calc on next Collection creation.
@@ -31,20 +41,24 @@ def initialize(ip, session)
else
# No data stored in session, so calculate it, and save it.
self.calculate_collection_data(ip, session)
- self.save_to_session(session)
+ self.save_to_session(ip, session)
end
end
# Right now we only save institutions, not services. Hmm.
- def save_to_session(session)
+ def save_to_session(ip, session)
# Create and blank out our data structure
session[:collection] = {:institutions=>[],
:services => {},
- :service_class_names => []}
+ :service_class_names => [],
+ :client_ip_addr => nil }
+
+ # Save client ip, so we can make sure to uncache if it changes
+ session[:collection][:client_ip_addr] = ip
# Save institution IDs. We'll refetch em from db later.
@institutions.each do | inst |
View
9 app/models/request.rb
@@ -13,11 +13,14 @@ def self.new_request(params, session, a_rails_request )
# Sometimes umlaut puts in a 'umlaut.request_id' parameter.
# first look by that, if we have it, for an existing request.
- begin
+ begin
+
request_id = params['umlaut.request_id']
# Be sure to use session id too to guard against spoofing by guessing
- # request ids from another session.
- req = Request.find(:first, :conditions => ["session_id = ? and id = ?", session.session_id, request_id] ) unless request_id.nil? || @user_request
+ # request ids from another session. If IP has changed, don't re-use the
+ # request!
+ client_ip = params['req.ip'] || a_rails_request.remote_ip()
+ req = Request.find(:first, :conditions => ["session_id = ? and id = ? and client_ip_addr = ?", session.session_id, request_id, client_ip] ) unless request_id.nil? || @user_request
rescue ActiveRecord::RecordNotFound
# Bad request id? Okay, pretend we never had a request_id at all.
request_id = nil
View
12 app/views/alternate/resolve_alternate.rhtml
@@ -106,9 +106,14 @@ unless relevant_links.empty? %>
<div id="rightColumn">
-<div class="sidebar_section"
-
-
+<div class="sidebar_section, smGrayBox">
+<%= render :partial=>"help"
+%>
+</div>
+
+
+
+<div class="sidebar_section">
<%
#{:action=>'web_search', :service_type=>'web_link', :label=>'Web Search Results'}
[{:action=>'toc', :service_type=>'table_of_contents', :label=>'Table of Contents'},
@@ -128,7 +133,6 @@ unless relevant_links.empty? %>
-
<div id="highlighted_links">
<%= render :partial=>"highlighted_links_start" %>
View
9 app/views/layouts/distribution/jhu_resolve.rhtml
@@ -13,12 +13,6 @@
<%= render(:partial => "layout_helpers/resolve_body_open" ) %>
<style type="text/css">
- #help {
- width: 187px;
- font-size: 10px;
- float: right;
- margin: 0;
- }
.centered_block {
width: 768px;
position: relative;
@@ -44,7 +38,6 @@
<div id="header" class="centered_block">
<%= image_tag('/local/header.gif', :style=>"float: left;")
%>
- <%= render :partial=>"help" %>
<div style="clear: both;"></div>
</div>
@@ -64,7 +57,7 @@
</div>
<div class="centered_block" id="footer">
- FindIt service provided by <%= link_to 'Johns Hopkins Libraries', 'http://webapps.jhu.edu/jhuniverse/libraries/' %> | Powered by Umlaut, using services from <%= link_to 'SFX', 'http://www.exlibrisgroup.com/sfx.htm' %>, <%= link_to 'CrossRef', 'http://www.crossref.org' %>, <%= link_to 'Amazon', 'http://www.amazon.com' %>, <%= link_to 'OCLC Worldcat', 'http://www.worldcat.org' %>, and others. <%= link_to '[S]', 'http://sfx.library.jhu.edu:8000/jhu_sfx?' + (hash_to_querystring(@user_request.original_co_params) if @user_request) %>
+ FindIt service provided by <%= link_to 'Johns Hopkins Libraries', 'http://webapps.jhu.edu/jhuniverse/libraries/' %> | Powered by Umlaut, using services from <%= link_to 'SFX', 'http://www.exlibrisgroup.com/sfx.htm' %>, <%= link_to 'CrossRef', 'http://www.crossref.org' %>, <%= link_to 'Amazon', 'http://www.amazon.com' %>, <%= link_to 'OCLC Worldcat', 'http://www.worldcat.org' %>, and others. <%= link_to '[S]', 'http://sfx.library.jhu.edu:8000/jhu_sfx?' + (hash_to_querystring(@user_request.original_co_params) if @user_request), :target=>'_blank' %>
</div>
</body>
View
24 app/views/resolve/_help.rhtml
@@ -1,19 +1,29 @@
+<style>
+.nopadding {
+ margin: 0;
+ padding: 0;
+ vertical-align: top;
+}
+</style>
+
<% help_responses = get_service_type('help')
unless (help_responses.blank?) %>
- <div id="help" class="small_right_box">
- <span id="helpicon">?</span>
- <span class="smallText">Question? Problem? Contact:</span>
- <ul>
+ <div id="help">
+ <table class="nopadding"><tr class="nopadding">
+ <td class="nopadding"><span id="helpicon">?</span></td>
+ <td class="nopadding"><span class="smallText">Question? Problem? Contact:</span>
+ <ul style="text-align: right">
<% help_responses.each do |help|
value_hash = help.view_data %>
<li> <%= link_to value_hash[:display_text], {:controller=>'link_router', :id=>help.id}, 'target'=>"_blank" %>
<% unless value_hash[:note].blank? %>
<br />
<%= value_hash[:note] %>
<% end %>
- </li>
+ </li>
<% end %>
- </ul>
- <span class="clear"></span>
+ </ul></td>
+ </tr></table>
</div>
<% end %>
+
View
15 config/environment.rb
@@ -54,8 +54,19 @@
# For ruby-debug
SCRIPT_LINES__ = {} if ENV['RAILS_ENV'] == 'development'
-
- # Umlaut Configuration below.
+
+ # Umlaut expects sesson store in active record. You can override
+ # this in umlaut_config/environment.rb if you like, but some
+ # automatic session management might not work.
+ config.action_controller.session_store = :active_record_store
+
+
+ # Umlaut Configuration below.
+
+ # When nightly_maintenance will expire sessions. Default to
+ # 1 day. Over-ride in umlaut_config/environment.rb if desired, but
+ # probably no reason to.
+ config.app_config.session_expire_seconds = 1.day
# Multi-thread action of foreground services.
# Reccommend you leave set to true, unless debugging.
View
8 config/umlaut_distribution/services.yml-dist
@@ -1,4 +1,8 @@
# Global services
+
+# If you have your SFX set up to get Metadata from crossref with a DOI,
+# then the SFX service will do that. If that's all this service does,
+# this one is not neccesary.
Crossref:
name: Crossref
type: Crossref
@@ -7,7 +11,9 @@ Crossref:
password:
priority: 1
status: active
-
+
+# The SFX service should always enhance with a pmid, the pubmed service
+# probably isn't neccesary.
Pubmed:
name: Pubmed
type: Pubmed
View
25 db/migrate/016_add_sessions.rb
@@ -0,0 +1,25 @@
+class AddSessions < ActiveRecord::Migration
+ def self.up
+ # Drop pre-existing weird one for Rails standard one.
+ drop_table :sessions
+
+ create_table :sessions do |t|
+ t.column :session_id, :string
+ t.column :data, :text
+ t.column :updated_at, :datetime
+ end
+
+ add_index :sessions, :session_id
+ add_index :sessions, :updated_at
+ end
+
+ def self.down
+ drop_table :sessions
+
+ # Add original weird one.
+ create_table "sessions", :force => true do |t|
+ t.column "sessid", :string, :limit => 32
+ t.column "data", :text
+ end
+ end
+end
View
11 db/migrate/017_add_service_type_indexes.rb
@@ -0,0 +1,11 @@
+class AddServiceTypeIndexes < ActiveRecord::Migration
+ def self.up
+ # Composite query on request_id and service_response_id does NOT cut it!
+ # We need individual index on service_response_id too.
+ add_index :service_types, :service_response_id
+ end
+
+ def self.down
+ remove_index :service_types, :service_response_id
+ end
+end
View
12 db/schema.rb
@@ -2,7 +2,7 @@
# migrations feature of ActiveRecord to incrementally modify your database, and
# then regenerate this schema definition.
-ActiveRecord::Schema.define(:version => 15) do
+ActiveRecord::Schema.define(:version => 17) do
create_table "categories", :force => true do |t|
t.column "category", :string, :limit => 100, :default => "", :null => false
@@ -195,13 +195,17 @@
add_index "service_types", ["request_id", "service_response_id"], :name => "svc_type_idx"
add_index "service_types", ["service_type_value_id"], :name => "index_service_types_on_service_type_value_id"
+ add_index "service_types", ["request_id"], :name => "index_service_types_on_request_id"
+ add_index "service_types", ["service_response_id"], :name => "index_service_types_on_service_response_id"
create_table "sessions", :force => true do |t|
- t.column "sessid", :string, :limit => 32
- t.column "data", :text
+ t.column "session_id", :string
+ t.column "data", :text
+ t.column "updated_at", :datetime
end
- add_index "sessions", ["sessid"], :name => "sess_sessid_idx"
+ add_index "sessions", ["session_id"], :name => "index_sessions_on_session_id"
+ add_index "sessions", ["updated_at"], :name => "index_sessions_on_updated_at"
create_table "sfx_urls", :force => true do |t|
t.column "url", :string
View
3 distribution/script/rewrite_map.pl
@@ -1,14 +1,11 @@
#!/usr/bin/perl
$| = 1; # Turn off bufferingi
-open LOG ,">>map_log";
while (<STDIN>) {
- print LOG "Orig: " . $_ . " \n";
s/>/%3E/g;
s/</%3C/g;
s/\//%2F/g;
s/\\/%5C/g;
s/ /\+/g;
print $_;
- print LOG "Translated: " . $_ . " \n";
}
View
371 lib/service_adaptors/sfx.rb
@@ -83,9 +83,9 @@ def initialize_client(request)
#context_object.referrer.add_identifier(request.referrer.identifier) if request.referrer
context_object = request.to_context_object
-
transport.add_context_object(context_object)
transport.extra_args["sfx.response_type"]="multi_obj_xml"
+
@get_coverage = false
@@ -101,6 +101,7 @@ def initialize_client(request)
if (context_object.referent.identifiers.find {|i| i =~ /^info:doi\// })
transport.extra_args['sfx.doi_url']='http://dx.doi.org'
end
+
return transport
end
@@ -119,162 +120,228 @@ def parse_response(resolver_response, request)
journal_index_on = false if journal_index_on.nil?
doc = Hpricot(resolver_response)
- # parse perl_data from response
- related_items = []
- # For god's sake, if there is more than one context object in our multi
- # response, just take the perl data from the FIRST one. Otherwise we end up
- # with a mess in enhanced metadata!
- attr_xml = CGI.unescapeHTML((doc/"/ctx_obj_set/ctx_obj/ctx_obj_attributes")[0].inner_html)
- perl_data = Hpricot(attr_xml)
- (perl_data/"//hash/item[@key='@sfx.related_object_ids']").each { | rel |
- (rel/'/array/item').each { | item |
- related_items << item.inner_html
- }
- }
-
- object_id_node = perl_data.at("//hash/item[@key='rft.object_id']")
- object_id = nil
- if object_id_node
- object_id = object_id_node.inner_html
- end
+
+
+ # There can be several context objects in the response.
+ # We need to keep track of which data comes from which, for
+ # SFX click-through generating et alia
+ sfx_objs = doc.search('/ctx_obj_set/ctx_obj')
+
+ # We need to keep track of which ones we find full text in,
+ # for metadata enhancing. We'll do that here:
+ fulltext_seen_in_index = {}
- sfx_target_service_ids = doc.search('//target/target_service_id').collect {|e| e.inner_html}
- enhance_referent(request, perl_data)
- # generate new metadata object, since we have enhanced our metadata
- metadata = request.referent.metadata
+ 0.upto(sfx_objs.length - 1 ) do |sfx_obj_index|
+
+ sfx_obj = sfx_objs[sfx_obj_index]
+
+ # Get out the "perl_data" section, with our actual OpenURL style
+ # context object information. Weird double-escaping, sorry.
+
+ ctx_obj_atts =
+ CGI.unescapeHTML( sfx_obj.at('/ctx_obj_attributes').inner_html)
+ perl_data = Hpricot( ctx_obj_atts )
+
+ # Pull out related items
+ # not currently used for anything.
+ #related_items = []
+ #(perl_data/"//hash/item[@key='@sfx.related_object_ids']").each { | rel |
+ # (rel/'/array/item').each { | item |
+ # related_items << item.inner_html
+ # }
+ #}
- request_id = nil
- request_id_node = (perl_data/"//hash/item[@key='sfx.request_id']")
- if request_id_node
- request_id = request_id_node.inner_html
- end
-
- if ( journal_index_on )
- if object_id
- journal = Journal.find_by_object_id(object_id)
- elsif metadata['issn']
- journal = Journal.find_by_issn_or_eissn(metadata['issn'], metadata['eissn'])
- end
- if journal
- journal.categories.each do | category |
- request.add_service_response({:service=>self,:key=>'SFX',:value_string=>category.category,:value_text=>category.subcategory},['subject'])
+
+ # get SFX objectID
+ object_id_node =
+ perl_data.at("/perldata/hash/item[@key='rft.object_id']")
+ object_id = object_id_node ? object_id_node.inner_html : nil
+
+ # Get SFX requestID
+ request_id_node =
+ perl_data.at("/perldata/hash/item[@key='sfx.request_id']")
+ request_id = request_id_node ? request_id_node.inner_html : nil
+
+ # Get targets service ids
+ sfx_target_service_ids =
+ sfx_obj.search('/ctx_obj_targets/target/target_service_id').collect {|e| e.inner_html}
+
+ # If journal index is on, load categories. Not sure this works or does
+ # anything at present.
+ metadata = request.referent.metadata
+ if ( journal_index_on )
+ if object_id
+ journal = Journal.find_by_object_id(object_id)
+ elsif metadata['issn']
+ journal = Journal.find_by_issn_or_eissn(metadata['issn'], metadata['eissn'])
+ end
+ if journal
+ journal.categories.each do | category |
+ request.add_service_response({:service=>self,:key=>'SFX',:value_string=>category.category,:value_text=>category.subcategory},['subject'])
+ end
end
end
- end
- # Load coverage/availability string from Rochkind's 'extra' SFX coverage
- # API, if configured, and if we have the right data to do so. We load em
- # all in bulk in one request, rather than a request per service.
- loaded_coverage_strings = nil
- if ( @get_coverage && @coverage_api_url && object_id && (sfx_target_service_ids.length > 0) )
- begin
- require 'net/http'
- require 'uri'
- require 'hpricot'
+
+ # Load coverage/availability string from Rochkind's 'extra' SFX coverage
+ # API, if configured, and if we have the right data to do so.
+ loaded_coverage_strings = nil
+ if ( @get_coverage && @coverage_api_url && object_id && (sfx_target_service_ids.length > 0) )
+ loaded_coverage_strings = load_coverage_strings(object_id, sfx_target_service_ids)
+ end
+
+ # For each target delivered by SFX
+ sfx_obj.search("/ctx_obj_targets/target").each_with_index do|target, target_index|
+ value_text = {}
- loaded_coverage_strings = {}
+ # First check @extra_targets_of_interest
+ sfx_target_name = target.at('target_name').inner_html
+ umlaut_service = @extra_targets_of_interest[sfx_target_name]
- coverage_url = URI.parse(@coverage_api_url)
- coverage_url.query = "rft.object_id=#{object_id}&target_service_id=#{sfx_target_service_ids.join(',')}"
-
- response = Net::HTTP.get_response( coverage_url )
- unless (response.kind_of? Net::HTTPSuccess)
- response.error!
+ # If not found, look for it in services_of_interest
+ unless ( umlaut_service )
+ sfx_service_type = target.at("/service_type").inner_html
+ umlaut_service = @services_of_interest[sfx_service_type]
+ end
+
+ # If we have multiple context objs, skip the ill and ask-a-librarian
+ # links for all but the first, to avoid dups. This is a bit messy,
+ # but this whole multiple hits thing is messy.
+ if ( sfx_obj_index > 0 &&
+ ( umlaut_service == 'document_delivery' ||
+ umlaut_service == 'help'))
+ next
+ end
+ if ( umlaut_service == 'fulltext')
+ fulltext_seen_in_index[sfx_obj_index] = true
end
- cov_doc = Hpricot( response.body )
+ if ( umlaut_service ) # Okay, it's in services or targets of interest
- error = cov_doc.at('/sfxcoverage/exception')
- if ( error )
- request.logger.error("Error in SFX coverage API result. #{coverage_url.to_s} ; #{error.to_s}")
- raise "Error in coverage API fetch"
- end
+ if (target/"/displayer")
+ source = "SFX/"+(target/"/displayer").inner_html
+ else
+ source = "SFX"+URI.parse(self.url).path
+ end
+
+ target_service_id = (target/"target_service_id").inner_html
+
+ coverage = nil
+ if ( @get_coverage )
+ if ( loaded_coverage_strings ) # used the external extra SFX api
+ coverage = loaded_coverage_strings[target_service_id]
+ elsif (journal_index_on && journal) # Umlaut journal index
+ cvg = journal.coverages.find(:first, :conditions=>['provider = ?', (target/"/target_public_name").inner_html])
+ coverage = cvg.coverage if cvg
+ end
+ end
+
+ if ( sfx_service_type == 'getDocumentDelivery' )
+ value_string = request_id
+ else
+ value_string = (target/"/target_service_id").inner_html
+ end
+
+ value_text[:url] = CGI.unescapeHTML((target/"/target_url").inner_html)
+ value_text[:notes] = CGI.unescapeHTML((target/"/note").inner_html)
+ value_text[:authentication] = CGI.unescapeHTML((target/"/authentication").inner_html)
+ value_text[:source] = source
+ value_text[:coverage] = coverage if coverage
- cov_doc.search('/sfxcoverage/targets/target').each do |target|
- next if target.empty? # it never should be, but sometimes is.
- service_id = target.at('target_service_id').inner_html
- coverage_str = target.at('availability_string').inner_html
- loaded_coverage_strings[service_id] = coverage_str
- end
- rescue Exception => e
- sfx_target_service_ids.each { |id| loaded_coverage_strings[id] = "Error in fetching coverage information." }
+ # Sfx metadata we want
+ value_text[:sfx_obj_index] = sfx_obj_index + 1 # sfx is 1 indexed
+ value_text[:sfx_target_index] = target_index + 1
+ value_text[:sfx_request_id] = (perl_data/"//hash/item[@key='sfx.request_id']").first.inner_html
+ value_text[:sfx_target_service_id] = target_service_id
+ value_text[:sfx_target_name] = sfx_target_name
+ # At url-generation time, the request isn't available to us anymore,
+ # so we better store this citation info here now, since we need it
+ # for sfx click passthrough
+
+ # Oops, need to take this from SFX delivered metadata.
+
+ value_text[:citation_year] = metadata['date']
+ value_text[:citation_volume] = metadata['volume'];
+ value_text[:citation_issue] = metadata['issue']
+ value_text[:citation_spage] = metadata['spage']
+
+ display_text = (target/"/target_public_name").inner_html
+
+ initHash = {:service=>self,
+ #:value_text=>value_text.to_yaml,
+ :service_data=>value_text, :display_text=>display_text,
+ :notes=>value_text[:notes]}
+
+ request.add_service_response(initHash , [umlaut_service])
+ end
end
end
+
- # Each target delivered by SFX
- (doc/"/ctx_obj_set/ctx_obj/ctx_obj_targets/target").each_with_index do|target, target_index|
- value_text = {}
+ # In case of multiple SFX hits, enhance metadata only from the
+ # one that actually had fulltext. If more than one did, forget it.
+ ctx_obj_atts = nil
+ if ( fulltext_seen_in_index.keys.length == 0)
+ # No fulltext, just take the first
+ ctx_obj_atts =
+ CGI.unescapeHTML( sfx_objs[0].at('/ctx_obj_attributes').inner_html)
+ elsif (fulltext_seen_in_index.keys.length == 1)
+ i = fulltext_seen_in_index.keys[0]
+ ctx_obj_atts =
+ CGI.unescapeHTML( sfx_objs[i].at('/ctx_obj_attributes').inner_html)
+ end
+ if ( ctx_obj_atts )
+ perl_data = Hpricot( ctx_obj_atts )
+ enhance_referent( request, perl_data )
+ end
+
+ end
- # First check @extra_targets_of_interest
- sfx_target_name = target.at('target_name').inner_html
- umlaut_service = @extra_targets_of_interest[sfx_target_name]
+ # Given an array of sfx target service ids, loads human-readable
+ # coverage strings from Rochkind's 'extra' SFX coverage API.
+ # Returns a hash, keyed on target service id,
+ # value coverage string.
+ def load_coverage_strings(object_id, sfx_target_service_ids)
+ require 'net/http'
+ require 'uri'
+ require 'hpricot'
- # If not found, look for it in services_of_interest
- unless ( umlaut_service )
- sfx_service_type = target.at("/service_type").inner_html
- umlaut_service = @services_of_interest[sfx_service_type]
+ begin
+ loaded_coverage_strings = {}
+
+ # We load em all in bulk in one request, rather than a
+ # request per service.
+ coverage_url = URI.parse(@coverage_api_url)
+ coverage_url.query = "rft.object_id=#{object_id}&target_service_id=#{sfx_target_service_ids.join(',')}"
+
+ response = Net::HTTP.get_response( coverage_url )
+ unless (response.kind_of? Net::HTTPSuccess)
+ response.error!
end
-
- if ( umlaut_service ) # Okay, it's in services or targets of interest
-
- if (target/"/displayer")
- source = "SFX/"+(target/"/displayer").inner_html
- else
- source = "SFX"+URI.parse(self.url).path
- end
-
- target_service_id = (target/"target_service_id").inner_html
-
- coverage = nil
- if ( @get_coverage )
- if ( loaded_coverage_strings ) # used the external extra SFX api
- coverage = loaded_coverage_strings[target_service_id]
- elsif (journal_index_on && journal) # Umlaut journal index
- cvg = journal.coverages.find(:first, :conditions=>['provider = ?', (target/"/target_public_name").inner_html])
- coverage = cvg.coverage if cvg
- end
- end
-
- if ( sfx_service_type == 'getDocumentDelivery' )
- value_string = request_id
- else
- value_string = (target/"/target_service_id").inner_html
- end
-
- value_text[:url] = CGI.unescapeHTML((target/"/target_url").inner_html)
- value_text[:notes] = CGI.unescapeHTML((target/"/note").inner_html)
- value_text[:authentication] = CGI.unescapeHTML((target/"/authentication").inner_html)
- value_text[:source] = source
- value_text[:coverage] = coverage if coverage
-
- # Sfx metadata we want
- value_text[:sfx_target_index] = target_index + 1 # sfx is 1 indexed
- value_text[:sfx_request_id] = (perl_data/"//hash/item[@key='sfx.request_id']").first.inner_html
- value_text[:sfx_target_service_id] = target_service_id
- value_text[:sfx_target_name] = sfx_target_name
- # At url-generation time, the request isn't available to us anymore,
- # so we better store this citation info here now, since we need it
- # for sfx click passthrough
- value_text[:citation_year] = metadata['date']
- value_text[:citation_volume] = metadata['volume'];
- value_text[:citation_issue] = metadata['issue']
- value_text[:citation_spage] = metadata['spage']
-
- display_text = (target/"/target_public_name").inner_html
-
- # :value_text=>value_text.to_yaml,
-
- initHash = {:service=>self,
- #:value_text=>value_text.to_yaml,
- :service_data=>value_text, :display_text=>display_text,
- :notes=>value_text[:notes]}
- request.add_service_response(initHash , [umlaut_service])
+
+ cov_doc = Hpricot( response.body )
+
+ error = cov_doc.at('/sfxcoverage/exception')
+ if ( error )
+ request.logger.error("Error in SFX coverage API result. #{coverage_url.to_s} ; #{error.to_s}")
+ raise "Error in coverage API fetch"
end
- end
+
+ cov_doc.search('/sfxcoverage/targets/target').each do |target|
+ next if target.empty? # it never should be, but sometimes is.
+ service_id = target.at('target_service_id').inner_html
+ coverage_str = target.at('availability_string').inner_html
+ loaded_coverage_strings[service_id] = coverage_str
+ end
+
+ rescue Exception => e
+ sfx_target_service_ids.each { |id| loaded_coverage_strings[id] = "Error in fetching coverage information." }
+ end
+
+ return loaded_coverage_strings
end
-
def sfx_click_passthrough
# From config, or if not that, from app default, or if not that, default
# to false.
@@ -315,10 +382,11 @@ def response_url(response)
# through SFX, so statistics are captured by SFX.
sfx_resolver_cgi_url = @base_url + "/cgi/core/sfxresolver.cgi"
- # Not sure if fixing tmp_ctx_obj_id to 1 is safe, but it seems to work,
- # and I don't know what the value is or how else to know it.
+
+
dataString = "?tmp_ctx_svc_id=#{response[:sfx_target_index]}"
- dataString += "&tmp_ctx_obj_id=1&service_id=#{response[:sfx_target_service_id]}"
+ dataString += "&tmp_ctx_obj_id=#{response[:sfx_obj_index]}"
+ dataString += "&service_id=#{response[:sfx_target_service_id]}"
dataString += "&request_id=#{response[:sfx_request_id]}"
dataString += "&rft.year="
dataString += response[:citation_year].to_s if response[:citation_year]
@@ -351,7 +419,7 @@ def self.parse_perl_data(perl_data)
# to know how long it took me to figure this out).
perl_data = Iconv.new('Latin1', 'UTF-8').iconv(perl_data)
- doc = Hpricot(perl_data)
+ doc = Hpricot.XML(perl_data)
co = OpenURL::ContextObject.new
co.referent.set_format('journal') # default
@@ -362,7 +430,7 @@ def self.parse_perl_data(perl_data)
# The auinit1 value is COMPLETELY messed up for reasons I do not know.
# Double encoded in bizarre ways.
- next if key == '@rft.auinit1'
+ next if key == '@rft.auinit1' || key == '@rft.auinit'
# Darn multi-value SFX hackery, indicated with keys beginning
# with '@'. Just take the first one,
@@ -409,26 +477,17 @@ def enhance_referent(request, perl_data)
metadata = request.referent.metadata
sfx_co = Sfx.parse_perl_data(perl_data.to_s)
+
sfx_metadata = sfx_co.referent.metadata
- # For reasons not understood by me, including the rft.object_id, which
- # should be SFX's internal object ID, in a later request, messes things up.
- # So eliminate it.
- sfx_metadata.delete('object_id')
- # some of these others are funky too, since it's an array
- sfx_metadata.delete('stitle')
- sfx_metadata.delete('auinit')
- sfx_metadata.delete('aulast')
-
+
# If we already had metadata for journal title and the SFX one
# differs, we want to over-write it. This is good for ambiguous
# incoming OpenURLs, among other things.
- # Actually, SFX messes up titles of non-ascii-7 (ie, diacritics etc)
- # in XML, so we don't really want to do that after all, sadly.
- #if request.referent.format == 'journal'
- # request.referent.enhance_referent("jtitle", sfx_metadata['jtitle'])
- #end
- # Let's do it with ISSN though
+ if request.referent.format == 'journal'
+ request.referent.enhance_referent("jtitle", sfx_metadata['jtitle'])
+ end
+ # And ISSN
if request.referent.format == 'journal' && ! sfx_metadata['issn'].blank?
request.referent.enhance_referent('issn', sfx_metadata['issn'])
end
View
62 lib/tasks/umlautdb.rake
@@ -1,6 +1,6 @@
namespace :umlaut do
desc "Perform nightly maintenance. Set up in cron."
- task :nightly_maintenance => [:load_sfx_urls]
+ task :nightly_maintenance => [:load_sfx_urls, :expire_sessions, :expire_old_data]
desc "Loads in initial set of irrelvant_sites and relevant_sites"
task :load_sites => :environment do
@@ -96,5 +96,65 @@ namespace :umlaut do
hosts.each {|h| SfxUrl.new({:url => h}).create }
end
end
+
+ desc "Expire sessions older than config.app_config.session_expire_seconds"
+ task :expire_sessions => :environment do
+ # Assume sessions are in db.
+ # Don't know good way to get the connection associated with sessions,
+ # since there is no model. Assume Request is in the same db.
+ expire_seconds = AppConfig.param("session_expire_seconds", 1.day)
+ puts "Expiring sessions older than #{expire_seconds} seconds."
+ Request.connection.execute("delete from sessions where now() - updated_at > #{expire_seconds}")
+ end
+
+
+ desc "Cleanup of database for old data associated with expired sessions etc."
+ task :expire_old_data => :environment do
+ # There are requests, responses, and dispatched_service entries
+ # hanging around for things that may be way old and no longer
+ # need to hang around. How do we know if they're too old?
+ # If they are no longer associated with any session, mainly.
+
+ # Except, we can not delete old Requests and their associated
+ # referent and referrer data, because they are used by the permalink
+ # service (and possibly by statistics too).
+
+ # However, if a Request no longer has a live session, let's get rid
+ # of all its ServiceTypes.
+
+ # We do this with 'destroy', which is slow, because it fetches
+ # everything into the db first. But I think that's okay. This
+ # code assumes session store in ActiveRecord in a sessions table.
+
+ puts "Deleting ServiceTypes for dead Requests..."
+ orphaned_service_types = ServiceType.find(:all, :include => [:request], :conditions => "requests.session_id is null OR requests.session_id NOT IN (select session_id from sessions)")
+
+ orphaned_service_types.each { |st| st.destroy }
+ puts " Deleted #{orphaned_service_types.length} ServiceTypes."
+
+ # Now, let's get rid of any ServiceResponses that no longer have
+ # ServiceTypes.
+ # Theoretically, a ServiceResponse can belong to more than one Request,
+ # via different ServiceType joins. However, Umlaut doesn't currently
+ # do that.
+ # Again with 'destroy' so all business rules for anything hanging off
+ # ServiceResponse are triggered.
+
+ puts "Deleting orphaned ServiceResponses..."
+ orphaned_responses = ServiceResponse.find(:all,
+ :include => [:service_types],
+ :conditions => "service_types.id is null")
+ orphaned_responses.each { |r| r.destroy }
+ puts " Deleted #{orphaned_responses.length} ServiceResponses."
+
+ # And get rid of DispatchedServices for 'dead' requests too. Don't
+ # need em.
+
+ puts "Deleting DispatchedServices for dead requests..."
+ orphaned_dispatch = DispatchedService.find(:all, :include => [:request], :conditions => "requests.session_id is null OR requests.session_id NOT IN (select session_id from sessions)")
+ orphaned_dispatch.each {|d| d.destroy }
+ puts " Deleted #{orphaned_dispatch.length} DispatchedServices."
+
+ end
end
View
2 lib/tasks/umlautlocal.rake
@@ -111,7 +111,7 @@ namespace :umlaut_lcl do
local_svn_root = UMLAUT_SVN_LOCAL if defined?(UMLAUT_SVN_LOCAL)
local_svn_root = ENV['UMLAUT_SVN_LOCAL'] unless local_svn_root
unless local_svn_root
- puts "Enter local svn root path: "
+ print "Enter local svn root path: "
local_svn_root = $stdin.gets.chomp
end
View
8 lib/tasks/upload_docs.rake
@@ -0,0 +1,8 @@
+namespace :doc do
+
+ desc "Upload current documentation to Rubyforge"
+ task :upload_to_rforge => :environment do
+ sh "scp -r doc/app/* " "#{AppConfig.param('rubyforge_username', ENV['RUBYFORGE_USERNAME'])}@rubyforge.org:/var/www/gforge-projects/umlaut/api/"
+ end
+
+end
View
2 public/stylesheets/basic.css
@@ -166,7 +166,7 @@ h4 {
}
.clear {
- clear=both;
+ clear: both;
}
/* Horizontal Line */

0 comments on commit ac1a3cc

Please sign in to comment.