From 778334d24b78ba91d1bf0f7bc52103650ea98e07 Mon Sep 17 00:00:00 2001 From: "shigeru.nakajima" Date: Wed, 20 Feb 2019 21:03:55 +0900 Subject: [PATCH] Ignore runs of phonetic that appears in Japanese --- lib/creek/shared_strings.rb | 2 +- spec/fixtures/sst.xml | 10 ++++++++++ spec/shared_string_spec.rb | 3 ++- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/creek/shared_strings.rb b/lib/creek/shared_strings.rb index 6abd2e3..dd5c922 100644 --- a/lib/creek/shared_strings.rb +++ b/lib/creek/shared_strings.rb @@ -29,7 +29,7 @@ def self.parse_shared_string_from_document(xml) dictionary = Hash.new xml.css('si').each_with_index do |si, idx| - text_nodes = si.css('t') + text_nodes = si.css('>t, r t') if text_nodes.count == 1 # plain text node dictionary[idx] = Creek::Styles::Converter.unescape_string(text_nodes.first.content) else # rich text nodes with text fragments diff --git a/spec/fixtures/sst.xml b/spec/fixtures/sst.xml index 112db32..2b54051 100644 --- a/spec/fixtures/sst.xml +++ b/spec/fixtures/sst.xml @@ -78,4 +78,14 @@ Cell with_x000D_escaped_x000D_characters + + 吉田兼好 + + ヨシダ + + + ケンコウ + + + \ No newline at end of file diff --git a/spec/shared_string_spec.rb b/spec/shared_string_spec.rb index 11fbf20..5039898 100644 --- a/spec/shared_string_spec.rb +++ b/spec/shared_string_spec.rb @@ -7,13 +7,14 @@ doc = Nokogiri::XML(shared_strings_xml_file) dictionary = Creek::SharedStrings.parse_shared_string_from_document(doc) - expect(dictionary.keys.size).to eq(6) + expect(dictionary.keys.size).to eq(7) expect(dictionary[0]).to eq('Cell A1') expect(dictionary[1]).to eq('Cell B1') expect(dictionary[2]).to eq('My Cell') expect(dictionary[3]).to eq('Cell A2') expect(dictionary[4]).to eq('Cell B2') expect(dictionary[5]).to eq("Cell with\rescaped\rcharacters") + expect(dictionary[6]).to eq('吉田兼好') end end \ No newline at end of file