Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Genre subject browse #471

Merged
merged 5 commits into from
Dec 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions marc_to_solr/lib/princeton_marc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -270,22 +270,27 @@ def set_pub_citation(record)

SEPARATOR = '—'

# for the hierarchical subject display and facet
# split with em dash along v,x,y,z
def process_subject_facet record, fields
subjects = []
# for the hierarchical subject/genre display
# split with em dash along t,v,x,y,z
# optional vocabulary argument for whitelisting subfield $2 vocabularies
def process_hierarchy(record, fields, vocabulary = [])
headings = []
split_on_subfield = ['t', 'v', 'x', 'y', 'z']
Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor|
subject = extractor.collect_subfields(field, spec).first
unless subject.nil?
heading = extractor.collect_subfields(field, spec).first
include_heading = vocabulary.empty? # always include the heading if a vocabulary is not specified
unless heading.nil?
field.subfields.each do |s_field|
subject = subject.gsub(" #{s_field.value}", "#{SEPARATOR}#{s_field.value}") if (s_field.code == 'v' || s_field.code == 'x' || s_field.code == 'y' || s_field.code == 'z')
# when specified, only include heading if it is part of the vocabulary
include_heading = vocabulary.include?(s_field.value) if s_field.code == '2' && !vocabulary.empty?
heading = heading.gsub(" #{s_field.value}", "#{SEPARATOR}#{s_field.value}") if split_on_subfield.include?(s_field.code)
end
subject = subject.split(SEPARATOR)
subject = subject.map{ |s| Traject::Macros::Marc21.trim_punctuation(s) }.join(SEPARATOR)
subjects << subject
heading = heading.split(SEPARATOR)
heading = heading.map{ |s| Traject::Macros::Marc21.trim_punctuation(s) }.join(SEPARATOR)
headings << heading if include_heading
end
end
subjects
headings
end

# for the split subject facet
Expand Down
29 changes: 26 additions & 3 deletions marc_to_solr/lib/traject_config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -736,16 +736,26 @@
# 650 XX abc{v--%}{x--%}{z--%}{y--%} S abcvxyz
# 651 XX a{v--%}{x--%}{y--%}{z--%} S avxyz
to_field 'subject_display' do |record, accumulator|
subjects = process_subject_facet(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
subjects = process_hierarchy(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
accumulator.replace(subjects)
end

# used for the browse lists and hierarchical subject facet
to_field 'subject_facet' do |record, accumulator|
subjects = process_subject_facet(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
subjects = process_hierarchy(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
accumulator.replace(subjects)
end

to_field 'lcgft_s' do |record, accumulator|
genres = process_hierarchy(record, '655|*7|avxyz', ['lcgft'])
accumulator.replace(genres)
end

to_field 'rbgenr_s' do |record, accumulator|
genres = process_hierarchy(record, '655|*7|avxyz', ['rbgenr'])
accumulator.replace(genres)
end

to_field 'cjk_subject', extract_marc('600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz', alternate_script: :only)

# used for split subject topic facet
Expand Down Expand Up @@ -827,7 +837,10 @@

# Form/Genre
# 655 |7 a{v--%}{x--%}{y--%}{z--%} S avxyz
to_field 'form_genre_display', extract_marc('655avxyz')
to_field 'form_genre_display' do |record, accumulator|
subjects = process_hierarchy(record, '655avxyz')
accumulator.replace(subjects)
end

# 600/610/650/651 $v, $x filtered
# 655 $a, $v, $x filtered
Expand Down Expand Up @@ -1077,6 +1090,16 @@
end
end

each_record do |_record, context|
if context.output_hash['form_genre_display']
remaining_genres = context.output_hash['form_genre_display']
remaining_genres -= context.output_hash['lcgft_s'] if context.output_hash['lcgft_s']
remaining_genres -= context.output_hash['rbgenr_s'] if context.output_hash['rbgenr_s']
context.output_hash['form_genre_remaining_display'] = remaining_genres unless remaining_genres.empty?
end
end


# Process location code once
each_record do |record, context|
location_codes = []
Expand Down
9 changes: 9 additions & 0 deletions marc_to_solr/spec/lib/config_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -457,4 +457,13 @@ def fixture_record(fixture_name)
expect(thesis_bc_marc['format']).to include 'Senior thesis'
end
end
describe 'combined subject_facet field' do
let(:g655_lcgft) { { "655"=>{ "ind1"=>"", "ind2"=>"7", "subfields"=>[{ "a"=>"Genre" }, { "2"=>"lcgft" }] } } }
let(:g655) { { "655"=>{ "ind1"=>"", "ind2"=>"7", "subfields"=>[{ "a"=>"Exclude from subject browse" }] } } }
let(:genre_subject_marc) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [g655, g655_lcgft], 'leader' => leader)) }

it 'form_genre_remaining_display field excludes lcgft headings' do
expect(genre_subject_marc['form_genre_remaining_display']).to eq ['Exclude from subject browse']
end
end
end
90 changes: 81 additions & 9 deletions marc_to_solr/spec/lib/princeton_marc_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,65 @@
end
end

describe 'form_genre_display' do
subject(:form_genre_display) { indexer.map_record(marc_record) }
let(:leader) { '1234567890' }
let(:field_655) do
{
"655" => {
"ind1" => "",
"ind2" => "0",
"subfields" => [
{
"a" => "Culture."
},
{
"v" => "Awesome"
},
{
"x" => "Dramatic rendition"
},
{
"y" => "19th century."
}
]
}
}
end
let(:field_655_2) do
{
"655" => {
"ind1" => "",
"ind2" => "7",
"subfields" => [
{
"a" => "Poetry"
},
{
"x" => "Translations into French"
},
{
"v" => "Maps"
},
{
"y" => "19th century."
}
]
}
}
end
let(:marc_record) do
MARC::Record.new_from_hash('leader' => leader, 'fields' => [field_655, field_655_2])
end
it "indexes the subfields as semicolon-delimited values" do
expect(form_genre_display).not_to be_empty
expect(form_genre_display).to include "form_genre_display"
expect(form_genre_display["form_genre_display"].length).to eq(2)
expect(form_genre_display["form_genre_display"].first).to eq("Culture#{SEPARATOR}Awesome#{SEPARATOR}Dramatic rendition#{SEPARATOR}19th century")
expect(form_genre_display["form_genre_display"].last).to eq("Poetry#{SEPARATOR}Translations into French#{SEPARATOR}Maps#{SEPARATOR}19th century")
end
end

describe 'process_genre_facet function' do
before(:all) do
@g600 = { "600"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "a"=>"Exclude" }, { "v"=>"John" }, { "x"=>"Join" }] } }
Expand Down Expand Up @@ -467,24 +526,37 @@
end
end

describe 'process_subject_facet function' do
describe 'process_hierarchy function' do
before(:all) do
@s610_ind2_5 = { "600"=>{ "ind1"=>"", "ind2"=>"5", "subfields"=>[{ "a"=>"Exclude" }] } }
@s600_ind2_7 = { "600"=>{ "ind1"=>"", "ind2"=>"7", "subfields"=>[{ "a"=>"Also Exclude" }] } }
@s600 = { "600"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "a"=>"John." }, { "t"=>"Title." }, { "v"=>"split genre" }, { "d"=>"2015" }] } }
@s600 = { "600"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "a"=>"John." }, { "t"=>"Title." }, { "v"=>"split genre" }, { "d"=>"2015" }, { "2"=>"special" }] } }
@s630 = { "630"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "x"=>"Fiction" }, { "y"=>"1492" }, { "z"=>"don't ignore" }, { "t"=>"TITLE." }] } }
@sample_marc = MARC::Record.new_from_hash('fields' => [@s610_ind2_5, @s600, @s630])
@subjects = process_subject_facet(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz')
@subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz')
@vocab_subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz', ['vocab'])
@special_subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz', ['special'])
end

it 'excludes subjects without 0 in the 2nd indicator' do
expect(@subjects).not_to include("Exclude")
expect(@subjects).not_to include("Also Exclude")
describe 'when an optional vocabulary limit is not provided' do
it 'excludes subjects without 0 in the 2nd indicator' do
expect(@subjects).not_to include("Exclude")
expect(@subjects).not_to include("Also Exclude")
end

it 'only separates t,v,x,y,z with em dash, strips punctuation' do
expect(@subjects).to include("John#{SEPARATOR}Title#{SEPARATOR}split genre 2015")
expect(@subjects).to include("Fiction#{SEPARATOR}1492#{SEPARATOR}don't ignore#{SEPARATOR}TITLE")
end
end

it 'only separates v,x,y,z with em dash, strips punctuation' do
expect(@subjects).to include("John. Title#{SEPARATOR}split genre 2015")
expect(@subjects).to include("Fiction#{SEPARATOR}1492#{SEPARATOR}don't ignore TITLE")
describe 'when a vocabulary limit is provided' do
it 'excludes headings missing a subfield 2 or part of a different vocab' do
expect(@vocab_subjects).to eq []
end
it 'only includes the heading from a matching subfield 2 value' do
expect(@special_subjects).to eq ["John#{SEPARATOR}Title#{SEPARATOR}split genre 2015"]
end
end
end

Expand Down