pulibrary · escowles · Dec 21, 2018 · Sep 21, 2018 · Sep 21, 2018 · Dec 19, 2018
diff --git a/marc_to_solr/lib/princeton_marc.rb b/marc_to_solr/lib/princeton_marc.rb
@@ -270,22 +270,27 @@ def set_pub_citation(record)
 
 SEPARATOR = '—'
 
-# for the hierarchical subject display and facet
-# split with em dash along v,x,y,z
-def process_subject_facet record, fields
-  subjects = []
+# for the hierarchical subject/genre display
+# split with em dash along t,v,x,y,z
+# optional vocabulary argument for whitelisting subfield $2 vocabularies
+def process_hierarchy(record, fields, vocabulary = [])
+  headings = []
+  split_on_subfield = ['t', 'v', 'x', 'y', 'z']
   Traject::MarcExtractor.cached(fields).collect_matching_lines(record) do |field, spec, extractor|
-    subject = extractor.collect_subfields(field, spec).first
-    unless subject.nil?
+    heading = extractor.collect_subfields(field, spec).first
+    include_heading = vocabulary.empty? # always include the heading if a vocabulary is not specified
+    unless heading.nil?
       field.subfields.each do |s_field|
-        subject = subject.gsub(" #{s_field.value}", "#{SEPARATOR}#{s_field.value}") if (s_field.code == 'v' || s_field.code == 'x' || s_field.code == 'y' || s_field.code == 'z')
+        # when specified, only include heading if it is part of the vocabulary
+        include_heading = vocabulary.include?(s_field.value) if s_field.code == '2' && !vocabulary.empty?
+        heading = heading.gsub(" #{s_field.value}", "#{SEPARATOR}#{s_field.value}") if split_on_subfield.include?(s_field.code)
       end
-      subject = subject.split(SEPARATOR)
-      subject = subject.map{ |s| Traject::Macros::Marc21.trim_punctuation(s) }.join(SEPARATOR)
-      subjects << subject
+      heading = heading.split(SEPARATOR)
+      heading = heading.map{ |s| Traject::Macros::Marc21.trim_punctuation(s) }.join(SEPARATOR)
+      headings << heading if include_heading
     end
   end
-  subjects
+  headings
 end
 
 # for the split subject facet

diff --git a/marc_to_solr/lib/traject_config.rb b/marc_to_solr/lib/traject_config.rb
@@ -736,16 +736,26 @@
 #    650 XX abc{v--%}{x--%}{z--%}{y--%} S abcvxyz
 #    651 XX a{v--%}{x--%}{y--%}{z--%} S avxyz
 to_field 'subject_display' do |record, accumulator|
-  subjects = process_subject_facet(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
+  subjects = process_hierarchy(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
   accumulator.replace(subjects)
 end
 
 # used for the browse lists and hierarchical subject facet
 to_field 'subject_facet' do |record, accumulator|
-  subjects = process_subject_facet(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
+  subjects = process_hierarchy(record, '600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz')
   accumulator.replace(subjects)
 end
 
+to_field 'lcgft_s' do |record, accumulator|
+  genres = process_hierarchy(record, '655|*7|avxyz', ['lcgft'])
+  accumulator.replace(genres)
+end
+
+to_field 'rbgenr_s' do |record, accumulator|
+  genres = process_hierarchy(record, '655|*7|avxyz', ['rbgenr'])
+  accumulator.replace(genres)
+end
+
 to_field 'cjk_subject', extract_marc('600|*0|abcdfklmnopqrtvxyz:610|*0|abfklmnoprstvxyz:611|*0|abcdefgklnpqstvxyz:630|*0|adfgklmnoprstvxyz:650|*0|abcvxyz:651|*0|avxyz', alternate_script: :only)
 
 # used for split subject topic facet
@@ -827,7 +837,10 @@
 
 # Form/Genre
 #    655 |7 a{v--%}{x--%}{y--%}{z--%} S avxyz
-to_field 'form_genre_display', extract_marc('655avxyz')
+to_field 'form_genre_display' do |record, accumulator|
+  subjects = process_hierarchy(record, '655avxyz')
+  accumulator.replace(subjects)
+end
 
 # 600/610/650/651 $v, $x filtered
 # 655 $a, $v, $x filtered
@@ -1077,6 +1090,16 @@
   end
 end
 
+each_record do |_record, context|
+  if context.output_hash['form_genre_display']
+    remaining_genres = context.output_hash['form_genre_display']
+    remaining_genres -= context.output_hash['lcgft_s'] if context.output_hash['lcgft_s']
+    remaining_genres -= context.output_hash['rbgenr_s'] if context.output_hash['rbgenr_s']
+    context.output_hash['form_genre_remaining_display'] = remaining_genres unless remaining_genres.empty?
+  end
+end
+
+
 # Process location code once
 each_record do |record, context|
   location_codes = []

diff --git a/marc_to_solr/spec/lib/config_spec.rb b/marc_to_solr/spec/lib/config_spec.rb
@@ -457,4 +457,13 @@ def fixture_record(fixture_name)
       expect(thesis_bc_marc['format']).to include 'Senior thesis'
     end
   end
+  describe 'combined subject_facet field' do
+    let(:g655_lcgft) { { "655"=>{ "ind1"=>"", "ind2"=>"7", "subfields"=>[{ "a"=>"Genre" }, { "2"=>"lcgft" }] } } }
+    let(:g655) { { "655"=>{ "ind1"=>"", "ind2"=>"7", "subfields"=>[{ "a"=>"Exclude from subject browse" }] } } }
+    let(:genre_subject_marc) { @indexer.map_record(MARC::Record.new_from_hash('fields' => [g655, g655_lcgft], 'leader' => leader)) }
+
+    it 'form_genre_remaining_display field excludes lcgft headings' do
+        expect(genre_subject_marc['form_genre_remaining_display']).to eq ['Exclude from subject browse']
+    end
+  end
 end
diff --git a/marc_to_solr/spec/lib/princeton_marc_spec.rb b/marc_to_solr/spec/lib/princeton_marc_spec.rb
@@ -429,6 +429,65 @@
     end
   end
 
+  describe 'form_genre_display' do
+    subject(:form_genre_display) { indexer.map_record(marc_record) }
+    let(:leader) { '1234567890' }
+    let(:field_655) do
+      {
+        "655" => {
+          "ind1" => "",
+          "ind2" => "0",
+          "subfields" => [
+            {
+              "a" => "Culture."
+            },
+            {
+              "v" => "Awesome"
+            },
+            {
+              "x" => "Dramatic rendition"
+            },
+            {
+              "y" => "19th century."
+            }
+          ]
+        }
+      }
+    end
+    let(:field_655_2) do
+      {
+        "655" => {
+          "ind1" => "",
+          "ind2" => "7",
+          "subfields" => [
+            {
+              "a" => "Poetry"
+            },
+            {
+              "x" => "Translations into French"
+            },
+            {
+              "v" => "Maps"
+            },
+            {
+              "y" => "19th century."
+            }
+          ]
+        }
+      }
+    end
+    let(:marc_record) do
+      MARC::Record.new_from_hash('leader' => leader, 'fields' => [field_655, field_655_2])
+    end
+    it "indexes the subfields as semicolon-delimited values" do
+      expect(form_genre_display).not_to be_empty
+      expect(form_genre_display).to include "form_genre_display"
+      expect(form_genre_display["form_genre_display"].length).to eq(2)
+      expect(form_genre_display["form_genre_display"].first).to eq("Culture#{SEPARATOR}Awesome#{SEPARATOR}Dramatic rendition#{SEPARATOR}19th century")
+      expect(form_genre_display["form_genre_display"].last).to eq("Poetry#{SEPARATOR}Translations into French#{SEPARATOR}Maps#{SEPARATOR}19th century")
+    end
+  end
+
   describe 'process_genre_facet function' do
     before(:all) do
       @g600 = { "600"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "a"=>"Exclude" }, { "v"=>"John" }, { "x"=>"Join" }] } }
@@ -467,24 +526,37 @@
     end
   end
 
-  describe 'process_subject_facet function' do
+  describe 'process_hierarchy function' do
     before(:all) do
       @s610_ind2_5 = { "600"=>{ "ind1"=>"", "ind2"=>"5", "subfields"=>[{ "a"=>"Exclude" }] } }
       @s600_ind2_7 = { "600"=>{ "ind1"=>"", "ind2"=>"7", "subfields"=>[{ "a"=>"Also Exclude" }] } }
-      @s600 = { "600"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "a"=>"John." }, { "t"=>"Title." }, { "v"=>"split genre" }, { "d"=>"2015" }] } }
+      @s600 = { "600"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "a"=>"John." }, { "t"=>"Title." }, { "v"=>"split genre" }, { "d"=>"2015" }, { "2"=>"special" }] } }
       @s630 = { "630"=>{ "ind1"=>"", "ind2"=>"0", "subfields"=>[{ "x"=>"Fiction" }, { "y"=>"1492" }, { "z"=>"don't ignore" }, { "t"=>"TITLE." }] } }
       @sample_marc = MARC::Record.new_from_hash('fields' => [@s610_ind2_5, @s600, @s630])
-      @subjects = process_subject_facet(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz')
+      @subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz')
+      @vocab_subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz', ['vocab'])
+      @special_subjects = process_hierarchy(@sample_marc, '600|*0|abcdfklmnopqrtvxyz:630|*0|adfgklmnoprstvxyz', ['special'])
     end
 
-    it 'excludes subjects without 0 in the 2nd indicator' do
-      expect(@subjects).not_to include("Exclude")
-      expect(@subjects).not_to include("Also Exclude")
+    describe 'when an optional vocabulary limit is not provided' do
+      it 'excludes subjects without 0 in the 2nd indicator' do
+        expect(@subjects).not_to include("Exclude")
+        expect(@subjects).not_to include("Also Exclude")
+      end
+
+      it 'only separates t,v,x,y,z with em dash, strips punctuation' do
+        expect(@subjects).to include("John#{SEPARATOR}Title#{SEPARATOR}split genre 2015")
+        expect(@subjects).to include("Fiction#{SEPARATOR}1492#{SEPARATOR}don't ignore#{SEPARATOR}TITLE")
+      end
     end
 
-    it 'only separates v,x,y,z with em dash, strips punctuation' do
-      expect(@subjects).to include("John. Title#{SEPARATOR}split genre 2015")
-      expect(@subjects).to include("Fiction#{SEPARATOR}1492#{SEPARATOR}don't ignore TITLE")
+    describe 'when a vocabulary limit is provided' do
+      it 'excludes headings missing a subfield 2 or part of a different vocab' do
+        expect(@vocab_subjects).to eq []
+      end
+      it 'only includes the heading from a matching subfield 2 value' do
+        expect(@special_subjects).to eq ["John#{SEPARATOR}Title#{SEPARATOR}split genre 2015"]
+      end
     end
   end