Skip to content

Commit

Permalink
Revert "Revert "Use structured data fields instead of parsing raw mar…
Browse files Browse the repository at this point in the history
…c" for marc links"

This reverts commit 5246198.
  • Loading branch information
cbeer committed Dec 1, 2018
1 parent c18721d commit 1cf376b
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 282 deletions.
145 changes: 8 additions & 137 deletions app/models/concerns/marc_links.rb
Original file line number Diff line number Diff line change
@@ -1,146 +1,17 @@
module MarcLinks
def marc_links
if self.respond_to?(:to_marc)
@marc_links ||= MarcLinks::Processor.new(self)
end
@marc_links ||= LinksWrapper.new(fetch(:marc_links_struct, []).map do |data|
SearchWorks::Links::Link.new(data)
end)
end
private
class Processor < SearchWorks::Links
def initialize(document)
@document = document
end
def all
@all ||= link_fields.map do |link_field|
link = process_link(link_field)
if link.present?
SearchWorks::Links::Link.new(
html: ["<a title='#{link[:title]}' href='#{link[:href]}'>#{link[:text]}</a>", "#{'(source: Casalini)' if link[:casalini_toc]}", (" #{link[:additional_text]}" if link[:additional_text])].compact.join(' '),
text: [link[:text], "#{'(source: Casalini)' if link[:casalini_toc]}", " #{link[:additional_text] if link[:additional_text]}"].compact.join(' ').strip,
href: link[:href],
fulltext: link_is_fulltext?(link_field),
stanford_only: stanford_only?(link),
finding_aid: link_is_finding_aid?(link_field),
managed_purl: link[:managed_purl],
file_id: link[:file_id],
druid: druid(link),
sort: link[:sort]
)
end
end.compact
end

private

def link_fields
@document.to_marc.find_all do |field|
('856') === field.tag
end
end

# Parse a URI object to return the host of the URL in the "url" parameter if it's a proxied resoruce
def link_host(link)
return link.host unless link.to_s =~ SearchWorks::Links::PROXY_REGEX && link.to_s.include?('url=')
proxy = CGI.parse(link.query)
return link.host unless proxy.key?('url')

extracted_url = URI.extract(proxy['url'].first).first
return link.host unless extracted_url
URI.parse(extracted_url).host
class LinksWrapper < SearchWorks::Links
def initialize(data)
@data = data
end

def process_link(field)
return if field['u'].nil?

# Not sure why I need this, but it fails on certain URLs w/o it. The link printed still has character in it
fixed_url = field['u'].gsub("^","").strip
url = URI.parse(fixed_url)
sub3 = nil
subz = []
suby = ""
field.each{|subfield|
if subfield.code == "3"
sub3 = subfield.value
elsif subfield.code == "z"
subz << subfield.value
elsif subfield.code == "y"
suby = subfield.value
end
}

if field["x"] and field["x"] == "CasaliniTOC"
{:text=>field["3"],
:title=>"",
:href=>field["u"],
:casalini_toc => true
}
elsif field['x'] && field['x'] =~ /SDR-PURL/
subxes = field.subfields.select { |subfield| subfield.code == 'x' }.map { |subfield| subfield.value.split(':', 2).map(&:strip) }.select { |x| x.length == 2 }.to_h

link_text = subxes['label'] if subxes['label'].present?
sort = subxes['sort']

title = subz.join(' ') if subz.present?
if title =~ stanford_affiliated_regex && (subbed_title = title.gsub(stanford_affiliated_regex, '')).present?
additional_text = "<span class='additional-link-text'>#{subbed_title}</span>".html_safe
end

{
text: link_text,
title: title,
href: field['u'],
casalini_toc: false,
additional_text: additional_text,
sort: sort,
managed_purl: true,
file_id: subxes['file']
}
else
link_text = (!suby.present? && !sub3.present?) ? link_host(url) : [sub3, suby].compact.join(' ')
title = subz.join(" ")
additional_text = nil
if title =~ stanford_affiliated_regex
additional_text = "<span class='additional-link-text'>#{title.gsub(stanford_affiliated_regex, '')}</span>"
title = "Available to Stanford-affiliated users only"
end
{:text=>link_text,
:title=> title,
:href=>field["u"],
:casalini_toc => false,
:additional_text => additional_text
}
end
rescue URI::InvalidURIError
return nil
end
def link_is_fulltext?(field)
resource_labels = ["table of contents", "abstract", "description", "sample text"]
if field.indicator2 == "2"
return false
elsif field.indicator2 == "0" or field.indicator2 == "1" or field.indicator2.blank?
resource_labels.each do |resource_label|
return false if "#{field['3']} #{field['z']}".downcase.include?(resource_label)
end
return true
else
# this should catch bad indicators
return nil
end
end

def link_is_finding_aid?(field)
"#{field['3']} #{field['z']}".downcase.include?('finding aid')
end

def stanford_only?(link)
[link[:text], link[:title]].join.downcase =~ stanford_affiliated_regex
end

def druid(link)
link[:href].gsub(%r{^https?:\/\/purl.stanford.edu\/?}, '') if link[:href] =~ /purl.stanford.edu/
end

def stanford_affiliated_regex
Regexp.new(/available[ -]?to[ -]?stanford[ -]?affiliated[ -]?users[ -]?a?t?[:;.]?/i)
def all
@data
end
end
end
11 changes: 4 additions & 7 deletions spec/lib/access_panels/online_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
include ModsFixtures
include Marc856Fixtures

let(:fulltext) { described_class.new(SolrDocument.new(marcxml: fulltext_856, marc_links_struct: [{:html=>"<a title='' href='https://library.stanford.edu'>Link text</a> ", :text=>"Link text", :href=>"https://library.stanford.edu", :fulltext=>true, :stanford_only=>nil, :finding_aid=>false, :managed_purl=>nil, :file_id=>nil, :druid=>nil}])) }
let(:supplemental) { described_class.new(SolrDocument.new(marcxml: supplemental_856)) }
let(:fulltext) { described_class.new(SolrDocument.new(marc_links_struct: [{:html=>"<a title='' href='https://library.stanford.edu'>Link text</a> ", :text=>"Link text", :href=>"https://library.stanford.edu", :fulltext=>true, :stanford_only=>nil, :finding_aid=>false, :managed_purl=>nil, :file_id=>nil, :druid=>nil}])) }
let(:supplemental) { described_class.new(SolrDocument.new) }
let(:eds_links) do
described_class.new(
SolrDocument.new(
Expand All @@ -24,7 +24,6 @@
described_class.new(
SolrDocument.new(
collection: ['12345'],
marcxml: fulltext_856,
marc_links_struct: [{:html=>"<a title='' href='https://library.stanford.edu'>Link text</a> ", :text=>"Link text", :href=>"https://library.stanford.edu", :fulltext=>true, :stanford_only=>nil, :finding_aid=>false, :managed_purl=>nil, :file_id=>nil, :druid=>nil}]
)
)
Expand All @@ -33,8 +32,7 @@
let(:managed_purl_doc) do
described_class.new(
SolrDocument.new(
managed_purl_urls: ['https://library.stanford.edu'],
marcxml: managed_purl_856
managed_purl_urls: ['https://library.stanford.edu']
)
)
end
Expand All @@ -43,8 +41,7 @@
described_class.new(
SolrDocument.new(
collection: ['12345'],
url_fulltext: 'https://purl.stanford.edu/',
modsxml: mods_everything
url_fulltext: 'https://purl.stanford.edu/'
)
)
end
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/holdings/location_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
let(:item_display) { '12345 -|- HOPKINS -|- STACKS -|- ' }
let(:callnumbers) { [Holdings::Callnumber.new(item_display)] }
let(:not_online_doc) { SolrDocument.new(item_display: [item_display]) }
let(:online_doc) { SolrDocument.new(marcxml: fulltext_856, marc_links_struct: [{ fulltext: true }], item_display: [item_display]) }
let(:online_doc) { SolrDocument.new(marc_links_struct: [{ fulltext: true }], item_display: [item_display]) }
let(:multi_holdings_doc) { SolrDocument.new(item_display: [item_display, '54321 -|- GREEN -|- STACKS -|- ']) }
it 'returns true for materials that are not available online' do
location = Holdings::Location.new('STACKS', callnumbers, not_online_doc)
Expand Down
154 changes: 22 additions & 132 deletions spec/models/concerns/marc_links_spec.rb
Original file line number Diff line number Diff line change
@@ -1,139 +1,29 @@
require "spec_helper"

class MarcLinksTestClass
include MarcLinks
end

describe MarcLinks do
include Marc856Fixtures
it "should return nil for non marc records" do
expect(MarcLinksTestClass.new.marc_links).to be_nil
end
describe "link html, text, and href" do
let(:document) { SolrDocument.new(marcxml: simple_856) }
let(:no_label_document) { SolrDocument.new(marcxml: labelless_856) }
let(:link_html) { document.marc_links.all.first.html }
let(:link_text) { document.marc_links.all.first.text }
let(:link_href) { document.marc_links.all.first.href }
it "should place the $3 and $y as the link text" do
expect(link_html).to match /<a.*>Link text 1 Link text 2<\/a>/
end
it "should place the $z as the link title attribute" do
expect(link_html).to match /<a.*title='Title text1 Title text2'.*>/
end
it "should use the host of the URL if no text is available" do
expect(no_label_document.marc_links.all.first.html).to match /<a.*>library.stanford.edu<\/a>/
end
it 'should include the plain text version' do
expect(link_text).to eq "Link text 1 Link text 2"
end
it 'should include the href' do
expect(link_href).to eq "https://library.stanford.edu"
end
end
describe "casalini links" do
let(:document) { SolrDocument.new(marcxml: casalini_856) }
let(:link_text) { document.marc_links.all.first.html }
it "should not have any text before the link" do
expect(link_text).to match /^<a /
end
it "should place $3 as the link text" do
expect(link_text).to match /<a.*>Link text<\/a>/
end
it "should place '(source: Casalini)' after the link" do
expect(link_text).to match /<\/a> \(source: Casalini\)/
end
end
describe "stanford_only?" do
let(:document) { SolrDocument.new(marcxml: stanford_only_856) }
let(:links) { document.marc_links.all }
it "should identify all the permutations of the Stanford Only string as Stanford Only resources" do
expect(links).to be_present
expect(links.all?(&:stanford_only?)).to be_truthy
end
end
describe "fulltext?" do
let(:document) { SolrDocument.new(marcxml: fulltext_856) }
let(:links) { document.marc_links.all }
it "method should return all fulltext links" do
expect(links).to eq document.marc_links.fulltext
end
it "should identify fulltext links" do
expect(links).to be_present
expect(links.all?(&:fulltext?)).to be_truthy
end
end

describe 'managed_purl?' do
let(:document) { SolrDocument.new(marcxml: managed_purl_856) }
let(:links) { document.marc_links }

it 'should return the managed purl links' do
expect(links.all).to be_present
expect(links.managed_purls).to be_present
expect(links.all).to eq(links.managed_purls)
expect(links.all.all?(&:managed_purl?)).to be true
end

it 'should return the file_id (without "file:")' do
expect(links.all.first.file_id).to eq 'abc123'
end

it 'should return the sort (without "sort:")' do
expect(links.all.first.sort).to eq '123'
end

it 'should return the label (without "label:")' do
expect(links.all.first.text).to eq 'some label'
end

context 'when stanford affiliated' do
let(:document) { SolrDocument.new(marcxml: stanford_affiliated_managed_purl_856) }

it 'does not include emmpty html from the additional_text that cannot be displayed' do
expect(links.all.first.text).to be_blank
end
end
end

describe "#supplemental" do
let(:document) { SolrDocument.new(marcxml: supplemental_856) }
let(:links) { document.marc_links.all }
it "method should return all supplemental links" do
expect(links).to eq document.marc_links.supplemental
end
it "should identify supplemental links" do
expect(links).to be_present
expect(links.any?(&:fulltext?)).to be_falsey
end
end
describe "#finding_aid" do
let(:document) { SolrDocument.new(marcxml: finding_aid_856) }
let(:links) { document.marc_links.all }
it "should return all finding aid links" do
expect(links).to be_present
expect(links.all?(&:finding_aid?)).to be_truthy
expect(links).to eq document.marc_links.finding_aid
end
end
describe "ez-proxy" do
let(:document) { SolrDocument.new(marcxml: ez_proxy_856 ) }
let(:links) { document.marc_links.all }
it "should place the host of the url parameter as link text of no explicit label is available" do
expect(links.first.html).to match /<a.*>library.stanford.edu<\/a/
end
end
describe "bad URLs" do
it "should not return anything when an 856 has no $u" do
document = SolrDocument.new(marcxml: no_url_856)
expect(document.marc_links.all).to_not be_present
end

it 'handles pulling th proxy host out of URLs with spaces in them' do
document = SolrDocument.new(marcxml: ez_proxy_with_spaces_856)

expect(document.marc_links.all.length).to eq 1
expect(document.marc_links.all.first.html).to match(%r{>library\.stanford\.edu</a>})
it "should return an empty array for non marc records" do
expect( SolrDocument.new.marc_links.all).to eq []
end

describe 'with a SolrDocument with structured data extracted from the marc' do
let(:document) do
SolrDocument.new(marc_links_struct: [
{ text: 'fulltext', fulltext: true },
{ text: 'stanford only', stanford_only: true },
{ html: 'finding aid', finding_aid: true },
{ text: 'druid', managed_purl: true, file_id: 'x', druid: 'abc' },
])
end

it 'decodes structured data in the document' do
expect(document.marc_links.all.length).to eq 4
expect(document.marc_links.fulltext.first.text).to eq 'fulltext'
expect(document.marc_links.finding_aid.first.html).to eq 'finding aid'
expect(document.marc_links.supplemental.first).to be_stanford_only
expect(document.marc_links.managed_purls.first.text).to eq 'druid'
expect(document.marc_links.managed_purls.first.file_id).to eq 'x'
expect(document.marc_links.managed_purls.first.druid).to eq 'abc'
end
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
document: SolrDocument.new(
id: '12345',
isbn_display: [123],
marcbib_xml: stanford_only_856,
marc_links_struct: [
{ html: 'a', fulltext: true },
{ html: 'b', fulltext: true },
Expand Down
Loading

0 comments on commit 1cf376b

Please sign in to comment.