-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "Revert "Use structured data fields instead of parsing raw mar…
…c" for marc links" This reverts commit 5246198.
- Loading branch information
Showing
6 changed files
with
39 additions
and
282 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,146 +1,17 @@ | ||
module MarcLinks | ||
def marc_links | ||
if self.respond_to?(:to_marc) | ||
@marc_links ||= MarcLinks::Processor.new(self) | ||
end | ||
@marc_links ||= LinksWrapper.new(fetch(:marc_links_struct, []).map do |data| | ||
SearchWorks::Links::Link.new(data) | ||
end) | ||
end | ||
private | ||
class Processor < SearchWorks::Links | ||
def initialize(document) | ||
@document = document | ||
end | ||
def all | ||
@all ||= link_fields.map do |link_field| | ||
link = process_link(link_field) | ||
if link.present? | ||
SearchWorks::Links::Link.new( | ||
html: ["<a title='#{link[:title]}' href='#{link[:href]}'>#{link[:text]}</a>", "#{'(source: Casalini)' if link[:casalini_toc]}", (" #{link[:additional_text]}" if link[:additional_text])].compact.join(' '), | ||
text: [link[:text], "#{'(source: Casalini)' if link[:casalini_toc]}", " #{link[:additional_text] if link[:additional_text]}"].compact.join(' ').strip, | ||
href: link[:href], | ||
fulltext: link_is_fulltext?(link_field), | ||
stanford_only: stanford_only?(link), | ||
finding_aid: link_is_finding_aid?(link_field), | ||
managed_purl: link[:managed_purl], | ||
file_id: link[:file_id], | ||
druid: druid(link), | ||
sort: link[:sort] | ||
) | ||
end | ||
end.compact | ||
end | ||
|
||
private | ||
|
||
def link_fields | ||
@document.to_marc.find_all do |field| | ||
('856') === field.tag | ||
end | ||
end | ||
|
||
# Parse a URI object to return the host of the URL in the "url" parameter if it's a proxied resoruce | ||
def link_host(link) | ||
return link.host unless link.to_s =~ SearchWorks::Links::PROXY_REGEX && link.to_s.include?('url=') | ||
proxy = CGI.parse(link.query) | ||
return link.host unless proxy.key?('url') | ||
|
||
extracted_url = URI.extract(proxy['url'].first).first | ||
return link.host unless extracted_url | ||
URI.parse(extracted_url).host | ||
class LinksWrapper < SearchWorks::Links | ||
def initialize(data) | ||
@data = data | ||
end | ||
|
||
def process_link(field) | ||
return if field['u'].nil? | ||
|
||
# Not sure why I need this, but it fails on certain URLs w/o it. The link printed still has character in it | ||
fixed_url = field['u'].gsub("^","").strip | ||
url = URI.parse(fixed_url) | ||
sub3 = nil | ||
subz = [] | ||
suby = "" | ||
field.each{|subfield| | ||
if subfield.code == "3" | ||
sub3 = subfield.value | ||
elsif subfield.code == "z" | ||
subz << subfield.value | ||
elsif subfield.code == "y" | ||
suby = subfield.value | ||
end | ||
} | ||
|
||
if field["x"] and field["x"] == "CasaliniTOC" | ||
{:text=>field["3"], | ||
:title=>"", | ||
:href=>field["u"], | ||
:casalini_toc => true | ||
} | ||
elsif field['x'] && field['x'] =~ /SDR-PURL/ | ||
subxes = field.subfields.select { |subfield| subfield.code == 'x' }.map { |subfield| subfield.value.split(':', 2).map(&:strip) }.select { |x| x.length == 2 }.to_h | ||
|
||
link_text = subxes['label'] if subxes['label'].present? | ||
sort = subxes['sort'] | ||
|
||
title = subz.join(' ') if subz.present? | ||
if title =~ stanford_affiliated_regex && (subbed_title = title.gsub(stanford_affiliated_regex, '')).present? | ||
additional_text = "<span class='additional-link-text'>#{subbed_title}</span>".html_safe | ||
end | ||
|
||
{ | ||
text: link_text, | ||
title: title, | ||
href: field['u'], | ||
casalini_toc: false, | ||
additional_text: additional_text, | ||
sort: sort, | ||
managed_purl: true, | ||
file_id: subxes['file'] | ||
} | ||
else | ||
link_text = (!suby.present? && !sub3.present?) ? link_host(url) : [sub3, suby].compact.join(' ') | ||
title = subz.join(" ") | ||
additional_text = nil | ||
if title =~ stanford_affiliated_regex | ||
additional_text = "<span class='additional-link-text'>#{title.gsub(stanford_affiliated_regex, '')}</span>" | ||
title = "Available to Stanford-affiliated users only" | ||
end | ||
{:text=>link_text, | ||
:title=> title, | ||
:href=>field["u"], | ||
:casalini_toc => false, | ||
:additional_text => additional_text | ||
} | ||
end | ||
rescue URI::InvalidURIError | ||
return nil | ||
end | ||
def link_is_fulltext?(field) | ||
resource_labels = ["table of contents", "abstract", "description", "sample text"] | ||
if field.indicator2 == "2" | ||
return false | ||
elsif field.indicator2 == "0" or field.indicator2 == "1" or field.indicator2.blank? | ||
resource_labels.each do |resource_label| | ||
return false if "#{field['3']} #{field['z']}".downcase.include?(resource_label) | ||
end | ||
return true | ||
else | ||
# this should catch bad indicators | ||
return nil | ||
end | ||
end | ||
|
||
def link_is_finding_aid?(field) | ||
"#{field['3']} #{field['z']}".downcase.include?('finding aid') | ||
end | ||
|
||
def stanford_only?(link) | ||
[link[:text], link[:title]].join.downcase =~ stanford_affiliated_regex | ||
end | ||
|
||
def druid(link) | ||
link[:href].gsub(%r{^https?:\/\/purl.stanford.edu\/?}, '') if link[:href] =~ /purl.stanford.edu/ | ||
end | ||
|
||
def stanford_affiliated_regex | ||
Regexp.new(/available[ -]?to[ -]?stanford[ -]?affiliated[ -]?users[ -]?a?t?[:;.]?/i) | ||
def all | ||
@data | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,139 +1,29 @@ | ||
require "spec_helper" | ||
|
||
class MarcLinksTestClass | ||
include MarcLinks | ||
end | ||
|
||
describe MarcLinks do | ||
include Marc856Fixtures | ||
it "should return nil for non marc records" do | ||
expect(MarcLinksTestClass.new.marc_links).to be_nil | ||
end | ||
describe "link html, text, and href" do | ||
let(:document) { SolrDocument.new(marcxml: simple_856) } | ||
let(:no_label_document) { SolrDocument.new(marcxml: labelless_856) } | ||
let(:link_html) { document.marc_links.all.first.html } | ||
let(:link_text) { document.marc_links.all.first.text } | ||
let(:link_href) { document.marc_links.all.first.href } | ||
it "should place the $3 and $y as the link text" do | ||
expect(link_html).to match /<a.*>Link text 1 Link text 2<\/a>/ | ||
end | ||
it "should place the $z as the link title attribute" do | ||
expect(link_html).to match /<a.*title='Title text1 Title text2'.*>/ | ||
end | ||
it "should use the host of the URL if no text is available" do | ||
expect(no_label_document.marc_links.all.first.html).to match /<a.*>library.stanford.edu<\/a>/ | ||
end | ||
it 'should include the plain text version' do | ||
expect(link_text).to eq "Link text 1 Link text 2" | ||
end | ||
it 'should include the href' do | ||
expect(link_href).to eq "https://library.stanford.edu" | ||
end | ||
end | ||
describe "casalini links" do | ||
let(:document) { SolrDocument.new(marcxml: casalini_856) } | ||
let(:link_text) { document.marc_links.all.first.html } | ||
it "should not have any text before the link" do | ||
expect(link_text).to match /^<a / | ||
end | ||
it "should place $3 as the link text" do | ||
expect(link_text).to match /<a.*>Link text<\/a>/ | ||
end | ||
it "should place '(source: Casalini)' after the link" do | ||
expect(link_text).to match /<\/a> \(source: Casalini\)/ | ||
end | ||
end | ||
describe "stanford_only?" do | ||
let(:document) { SolrDocument.new(marcxml: stanford_only_856) } | ||
let(:links) { document.marc_links.all } | ||
it "should identify all the permutations of the Stanford Only string as Stanford Only resources" do | ||
expect(links).to be_present | ||
expect(links.all?(&:stanford_only?)).to be_truthy | ||
end | ||
end | ||
describe "fulltext?" do | ||
let(:document) { SolrDocument.new(marcxml: fulltext_856) } | ||
let(:links) { document.marc_links.all } | ||
it "method should return all fulltext links" do | ||
expect(links).to eq document.marc_links.fulltext | ||
end | ||
it "should identify fulltext links" do | ||
expect(links).to be_present | ||
expect(links.all?(&:fulltext?)).to be_truthy | ||
end | ||
end | ||
|
||
describe 'managed_purl?' do | ||
let(:document) { SolrDocument.new(marcxml: managed_purl_856) } | ||
let(:links) { document.marc_links } | ||
|
||
it 'should return the managed purl links' do | ||
expect(links.all).to be_present | ||
expect(links.managed_purls).to be_present | ||
expect(links.all).to eq(links.managed_purls) | ||
expect(links.all.all?(&:managed_purl?)).to be true | ||
end | ||
|
||
it 'should return the file_id (without "file:")' do | ||
expect(links.all.first.file_id).to eq 'abc123' | ||
end | ||
|
||
it 'should return the sort (without "sort:")' do | ||
expect(links.all.first.sort).to eq '123' | ||
end | ||
|
||
it 'should return the label (without "label:")' do | ||
expect(links.all.first.text).to eq 'some label' | ||
end | ||
|
||
context 'when stanford affiliated' do | ||
let(:document) { SolrDocument.new(marcxml: stanford_affiliated_managed_purl_856) } | ||
|
||
it 'does not include emmpty html from the additional_text that cannot be displayed' do | ||
expect(links.all.first.text).to be_blank | ||
end | ||
end | ||
end | ||
|
||
describe "#supplemental" do | ||
let(:document) { SolrDocument.new(marcxml: supplemental_856) } | ||
let(:links) { document.marc_links.all } | ||
it "method should return all supplemental links" do | ||
expect(links).to eq document.marc_links.supplemental | ||
end | ||
it "should identify supplemental links" do | ||
expect(links).to be_present | ||
expect(links.any?(&:fulltext?)).to be_falsey | ||
end | ||
end | ||
describe "#finding_aid" do | ||
let(:document) { SolrDocument.new(marcxml: finding_aid_856) } | ||
let(:links) { document.marc_links.all } | ||
it "should return all finding aid links" do | ||
expect(links).to be_present | ||
expect(links.all?(&:finding_aid?)).to be_truthy | ||
expect(links).to eq document.marc_links.finding_aid | ||
end | ||
end | ||
describe "ez-proxy" do | ||
let(:document) { SolrDocument.new(marcxml: ez_proxy_856 ) } | ||
let(:links) { document.marc_links.all } | ||
it "should place the host of the url parameter as link text of no explicit label is available" do | ||
expect(links.first.html).to match /<a.*>library.stanford.edu<\/a/ | ||
end | ||
end | ||
describe "bad URLs" do | ||
it "should not return anything when an 856 has no $u" do | ||
document = SolrDocument.new(marcxml: no_url_856) | ||
expect(document.marc_links.all).to_not be_present | ||
end | ||
|
||
it 'handles pulling th proxy host out of URLs with spaces in them' do | ||
document = SolrDocument.new(marcxml: ez_proxy_with_spaces_856) | ||
|
||
expect(document.marc_links.all.length).to eq 1 | ||
expect(document.marc_links.all.first.html).to match(%r{>library\.stanford\.edu</a>}) | ||
it "should return an empty array for non marc records" do | ||
expect( SolrDocument.new.marc_links.all).to eq [] | ||
end | ||
|
||
describe 'with a SolrDocument with structured data extracted from the marc' do | ||
let(:document) do | ||
SolrDocument.new(marc_links_struct: [ | ||
{ text: 'fulltext', fulltext: true }, | ||
{ text: 'stanford only', stanford_only: true }, | ||
{ html: 'finding aid', finding_aid: true }, | ||
{ text: 'druid', managed_purl: true, file_id: 'x', druid: 'abc' }, | ||
]) | ||
end | ||
|
||
it 'decodes structured data in the document' do | ||
expect(document.marc_links.all.length).to eq 4 | ||
expect(document.marc_links.fulltext.first.text).to eq 'fulltext' | ||
expect(document.marc_links.finding_aid.first.html).to eq 'finding aid' | ||
expect(document.marc_links.supplemental.first).to be_stanford_only | ||
expect(document.marc_links.managed_purls.first.text).to eq 'druid' | ||
expect(document.marc_links.managed_purls.first.file_id).to eq 'x' | ||
expect(document.marc_links.managed_purls.first.druid).to eq 'abc' | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.