From 38b1814de670356c31ba1c875fff5fa5cc19b884 Mon Sep 17 00:00:00 2001 From: Ian Norris Date: Tue, 1 Aug 2023 15:34:25 -0700 Subject: [PATCH] [5042] pull out a custom helper in favor of letting the docx gem mostly handle it instead --------- Co-authored-by: Ian Norris --- .../case_court_reports_controller_spec.rb | 18 +- spec/models/case_court_report_spec.rb | 143 +++++++--- spec/requests/case_court_reports_spec.rb | 13 +- spec/requests/court_dates_spec.rb | 41 +-- spec/support/docx_inspector.rb | 247 ------------------ spec/support/download_helpers.rb | 15 ++ 6 files changed, 158 insertions(+), 319 deletions(-) delete mode 100644 spec/support/docx_inspector.rb diff --git a/spec/controllers/case_court_reports_controller_spec.rb b/spec/controllers/case_court_reports_controller_spec.rb index a3d9a60258..a839b7e764 100644 --- a/spec/controllers/case_court_reports_controller_spec.rb +++ b/spec/controllers/case_court_reports_controller_spec.rb @@ -1,6 +1,7 @@ require "rails_helper" RSpec.describe CaseCourtReportsController, type: :controller do + include DownloadHelpers describe "GET index" do context "when volunteer" do it "successfully accesses 'Generate Court Report' page" do @@ -155,9 +156,9 @@ get :show, params: {id: case_number, format: "docx"} - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_header_contains?("YOUR CASA ORG’S NUMBER")).to eq(true) + expect(header_text(docx_response)).to include("YOUR CASA ORG’S NUMBER") end end context "when a custom template is set" do @@ -180,9 +181,9 @@ get :show, params: {id: case_number, format: "docx"} - document_inspector = DocxInspector.new(docx_contents: response.body) + download_docx = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?("Did you forget to enter your court orders?")).to eq(true) + expect(download_docx.paragraphs.map(&:to_s)).to include("Did you forget to enter your court orders?") end end end @@ -227,9 +228,8 @@ get :show, params: {id: case_number, format: "docx"} - document_inspector = DocxInspector.new(docx_contents: response.body) - - expect(document_inspector.word_list_header_contains?("YOUR CASA ORG’S NUMBER")).to eq(true) + docx = Docx::Document.open(StringIO.new(response.body)) + expect(header_text(docx)).to include("YOUR CASA ORG’S NUMBER") end end context "when a custom template is set" do @@ -252,9 +252,9 @@ get :show, params: {id: case_number, format: "docx"} - document_inspector = DocxInspector.new(docx_contents: response.body) + download_docx = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?("Did you forget to enter your court orders?")).to eq(true) + expect(download_docx.paragraphs.map(&:to_s)).to include("Did you forget to enter your court orders?") end end end diff --git a/spec/models/case_court_report_spec.rb b/spec/models/case_court_report_spec.rb index 41e2a18492..1c747c4f19 100644 --- a/spec/models/case_court_report_spec.rb +++ b/spec/models/case_court_report_spec.rb @@ -4,6 +4,7 @@ require "sablon" RSpec.describe CaseCourtReport, type: :model do + include DownloadHelpers let(:path_to_template) { Rails.root.join("app", "documents", "templates", "default_report_template.docx").to_s } let(:path_to_report) { Rails.root.join("tmp", "test_report.docx").to_s } @@ -107,7 +108,6 @@ let(:contact_type) { create(:contact_type, name: document_data[:case_contact_type]) } let(:case_contact) { create(:case_contact, contact_made: false, occurred_at: document_data[:case_contact_time]) } let(:court_order) { create(:case_court_order, implementation_status: :partially_implemented) } - let(:document_inspector) { DocxInspector.new(docx_contents: report.generate_to_string) } before(:each) do casa_case_with_contacts.casa_org.update_attribute(:address, document_data[:org_address]) @@ -124,47 +124,79 @@ end it "displays the org address" do - expect(document_inspector.word_list_header_contains?(document_data[:org_address])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + expect(header_text(docx_response)).to include(document_data[:org_address]) end it "displays today's date formatted" do - expect(document_inspector.word_list_document_contains?(Date.current.strftime("%B %-d, %Y"))).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{Date.current.strftime("%B %-d, %Y")}.*/) end it "displays the case hearing date date formatted" do - expect(document_inspector.word_list_document_contains?(document_data[:case_hearing_date].strftime("%B %-d, %Y"))).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{document_data[:case_hearing_date].strftime("%B %-d, %Y")}.*/) end it "displays the case number" do - expect(document_inspector.word_list_document_contains?(document_data[:case_number])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{document_data[:case_number]}.*/) end - it "displays th case contact type" do - expect(document_inspector.word_list_document_contains?(document_data[:case_contact_type])).to eq(true) + it "displays the case contact type" do + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(/#{document_data[:case_contact_type]}.*/) end - it "displays the case contact tiime date formatted" do - expect(document_inspector.word_list_document_contains?("#{document_data[:case_contact_time].strftime("%-m/%d")}*")).to eq(true) + it "displays the case contact time date formatted" do + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(/#{document_data[:case_contact_time].strftime("%-m/%d")}.*/) end it "displays the text" do - expect(document_inspector.word_list_document_contains?(document_data[:text])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(/#{document_data[:text]}.*/) end it "displays the order status" do - expect(document_inspector.word_list_document_contains?("Partially implemented")).to eq(true) # Order Status + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include("Partially implemented") end it "displays the volunteer name" do - expect(document_inspector.word_list_document_contains?(document_data[:volunteer_name])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(/#{document_data[:volunteer_name]}.*/) end it "displays the volunteer case assignment date formatted" do - expect(document_inspector.word_list_document_contains?(document_data[:volunteer_case_assignment_date].strftime("%B %-d, %Y"))).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(/#{document_data[:volunteer_case_assignment_date].strftime("%B %-d, %Y")}.*/) end it "displayes the supervisor name" do - expect(document_inspector.word_list_document_contains?(document_data[:supervisor_name])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(/#{document_data[:supervisor_name]}.*/) end end @@ -199,7 +231,6 @@ let(:contact_type) { create(:contact_type, name: document_data[:case_contact_type]) } let(:case_contact) { create(:case_contact, contact_made: false, occurred_at: document_data[:case_contact_time]) } let(:court_order) { create(:case_court_order, implementation_status: :partially_implemented) } - let(:document_inspector) { DocxInspector.new(docx_contents: report.generate_to_string) } before(:each) do casa_case.casa_org.update_attribute(:address, document_data[:org_address]) @@ -213,31 +244,52 @@ end it "displays today's date formatted" do - expect(document_inspector.word_list_document_contains?(Date.current.strftime("%B %-d, %Y"))).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{Date.current.strftime("%B %-d, %Y")}.*/) end it "displays the case hearing date formatted" do - expect(document_inspector.word_list_document_contains?(document_data[:case_hearing_date].strftime("%B %-d, %Y"))).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{document_data[:case_hearing_date].strftime("%B %-d, %Y")}.*/) end - it "displays the case numbet" do - expect(document_inspector.word_list_document_contains?(document_data[:case_number])).to eq(true) + it "displays the case number" do + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + expect(docx_response.paragraphs.map(&:to_s)).to include(/.*#{document_data[:case_number]}.*/) end it "displays the case contact type" do - expect(document_inspector.word_list_document_contains?(document_data[:case_contact_type])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(document_data[:case_contact_type]) end it "displays the case contact time formatted" do - expect(document_inspector.word_list_document_contains?("#{document_data[:case_contact_time].strftime("%-m/%d")}*")).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include(document_data[:case_contact_time].strftime("%-m/%d*")) end it "displays the test" do - expect(document_inspector.word_list_document_contains?(document_data[:text])).to eq(true) + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include("This text shall not be strikingly similar to other text in the document") end it "displays the order status" do - expect(document_inspector.word_list_document_contains?("Partially implemented")).to eq(true) # Order Status + docx_response = Docx::Document.open(StringIO.new(report.generate_to_string)) + + table_data = docx_response.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + + expect(table_data).to include("Partially implemented") end end end @@ -262,10 +314,10 @@ describe "when court orders has different implementation statuses" do let(:casa_case) { create(:casa_case, case_number: "Sample-Case-12345") } - let(:court_order_implemented) { create(:case_court_order, casa_case: casa_case, text: "K6N-ce8|NuXnht(", implementation_status: :implemented) } - let(:court_order_unimplemented) { create(:case_court_order, casa_case: casa_case, text: "'q\"tE1LP-9W>,2)", implementation_status: :unimplemented) } - let(:court_order_partially_implemented) { create(:case_court_order, casa_case: casa_case, text: "ZmCw@w@\d`&roct", implementation_status: :partially_implemented) } - let(:court_order_not_specified) { create(:case_court_order, casa_case: casa_case, text: "(4WqOL7e'FRYd@%", implementation_status: nil) } + let(:court_order_implemented) { create(:case_court_order, casa_case: casa_case, text: "an order that got done", implementation_status: :implemented) } + let(:court_order_unimplemented) { create(:case_court_order, casa_case: casa_case, text: "an order that got not done", implementation_status: :unimplemented) } + let(:court_order_partially_implemented) { create(:case_court_order, casa_case: casa_case, text: "an order that got kinda done", implementation_status: :partially_implemented) } + let(:court_order_not_specified) { create(:case_court_order, casa_case: casa_case, text: "what is going on", implementation_status: nil) } let(:args) do { case_id: casa_case.id, @@ -275,7 +327,6 @@ end let(:context) { CaseCourtReportContext.new(args).context } let(:case_report) { CaseCourtReport.new(path_to_template: path_to_template, context: context) } - let(:document_inspector) { DocxInspector.new(docx_contents: case_report.generate_to_string) } before(:each) do casa_case.case_court_orders << court_order_implemented @@ -285,39 +336,57 @@ end it "contains the case number" do - expect(document_inspector.word_list_document_contains?(casa_case.case_number)).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{casa_case.case_number}*/) end it "contains the court order text" do - expect(document_inspector.word_list_document_contains?(court_order_implemented.text)).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/#{court_order_implemented.text}.*/) end it "contains the exact value of 'Implemented'" do - expect(document_inspector.word_list_document_contains?("Implemented")).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/Implemented.*/) end it "contains the court order text" do - expect(document_inspector.word_list_document_contains?(court_order_unimplemented.text)).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/#{court_order_unimplemented.text}.*/) end it "contains the exact value of 'Unimplemented'" do - expect(document_inspector.word_list_document_contains?("Unimplemented")).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/Unimplemented.*/) end it "contains the court order text" do - expect(document_inspector.word_list_document_contains?(court_order_partially_implemented.text)).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/#{court_order_partially_implemented.text}.*/) end it "contains the exact value of 'Partially implemented'" do - expect(document_inspector.word_list_document_contains?("Partially implemented")).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/Partially implemented.*/) end it "contains the court order text" do - expect(document_inspector.word_list_document_contains?(court_order_not_specified.text)).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/#{court_order_not_specified.text}.*/) end it "contains the exact value of 'Not specified'" do - expect(document_inspector.word_list_document_contains?("Not specified")).to eq(true) + docx_response = Docx::Document.open(StringIO.new(case_report.generate_to_string)) + + expect(table_text(docx_response)).to include(/Not specified.*/) end end end diff --git a/spec/requests/case_court_reports_spec.rb b/spec/requests/case_court_reports_spec.rb index 9d8207ebc1..42ca28a2b0 100644 --- a/spec/requests/case_court_reports_spec.rb +++ b/spec/requests/case_court_reports_spec.rb @@ -1,6 +1,7 @@ require "rails_helper" RSpec.describe "/case_court_reports", type: :request do + include DownloadHelpers let(:volunteer) { create(:volunteer, :with_cases_and_contacts, :with_assigned_supervisor) } before do @@ -162,9 +163,9 @@ request_generate_court_report get JSON.parse(response.body)["link"] - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_header_contains?("YOUR CASA ORG’S NUMBER")).to eq(true) + expect(header_text(docx_response)).to include("YOUR CASA ORG’S NUMBER") end context "as a supervisor" do @@ -203,9 +204,9 @@ it "uses the custom template" do get JSON.parse(response.body)["link"] - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?("Did you forget to enter your court orders?")).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).to include("Did you forget to enter your court orders?") end end end @@ -233,9 +234,9 @@ get JSON.parse(response.body)["link"] - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?(I18n.l(user_different_timezone.at(server_time).to_date, format: :full, default: nil))).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).to include("Date Written: #{I18n.l(user_different_timezone.at(server_time).to_date, format: :full, default: nil)}") end end end diff --git a/spec/requests/court_dates_spec.rb b/spec/requests/court_dates_spec.rb index 19e978464a..a78a7748d2 100644 --- a/spec/requests/court_dates_spec.rb +++ b/spec/requests/court_dates_spec.rb @@ -1,6 +1,7 @@ require "rails_helper" RSpec.describe "/casa_cases/:casa_case_id/court_dates/:id", type: :request do + include DownloadHelpers let(:admin) { create(:casa_admin) } let(:casa_case) { court_date.casa_case } let(:court_date) { create(:court_date) } @@ -66,9 +67,9 @@ it "displays the court date" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?("December 25, 2020")).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).to include(/December 25, 2020/) end context "when a judge is attached" do @@ -78,9 +79,9 @@ it "includes the judge's name in the document" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?(judge.name)).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{judge.name}/) end end @@ -91,11 +92,11 @@ it "includes None for the judge's name in the document" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?(judge.name)).to eq(false) - expect(document_inspector.word_list_document_contains?("Judge:")).to eq(true) # Judge: None - expect(document_inspector.word_list_document_contains?("None")).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).not_to include(/#{judge.name}/) + expect(docx_response.paragraphs.map(&:to_s)).to include(/Judge:/) + expect(docx_response.paragraphs.map(&:to_s)).to include(/None/) end end @@ -106,9 +107,9 @@ it "includes the hearing type in the document" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?(hearing_type.name)).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).to include(/#{hearing_type.name}/) end end @@ -119,11 +120,11 @@ it "includes None for the hearing type in the document" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?(hearing_type.name)).to eq(false) - expect(document_inspector.word_list_document_contains?("Hearing Type:")).to eq(true) # Hearing Type: None - expect(document_inspector.word_list_document_contains?("None")).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).not_to include(/#{hearing_type.name}/) + expect(docx_response.paragraphs.map(&:to_s)).to include(/Hearing Type:/) + expect(docx_response.paragraphs.map(&:to_s)).to include(/None/) end end @@ -134,11 +135,11 @@ it "includes court order info" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?("Court Orders:")).to eq(true) # Court Orders: - expect(document_inspector.word_list_document_contains?(court_date.case_court_orders.first.text)).to eq(true) - expect(document_inspector.word_list_document_contains?(court_date.case_court_orders.first.implementation_status.humanize)).to eq(true) + expect(docx_response.paragraphs.map(&:to_s)).to include(/Court Orders/) + expect(table_text(docx_response)).to include(/#{court_date.case_court_orders.first.text}/) + expect(table_text(docx_response)).to include(/#{court_date.case_court_orders.first.implementation_status.humanize}/) end end @@ -149,9 +150,9 @@ it "does not include court orders section" do show - document_inspector = DocxInspector.new(docx_contents: response.body) + docx_response = Docx::Document.open(StringIO.new(response.body)) - expect(document_inspector.word_list_document_contains?("Court Orders:")).to eq(false) # Court Orders: + expect(docx_response.paragraphs.map(&:to_s)).not_to include(/Court Orders/) end end end diff --git a/spec/support/docx_inspector.rb b/spec/support/docx_inspector.rb deleted file mode 100644 index 7e1efd4b6d..0000000000 --- a/spec/support/docx_inspector.rb +++ /dev/null @@ -1,247 +0,0 @@ -class DocxInspector - IGNORED_FILE_LIST = {"fontTable" => 0, "numbering" => 0, "settings" => 0, "styles" => 0, "webSettings" => 0} - DOCX_WORD_DIRECTORY_FILENAME_CAPTURE_PATTERN = /^word\/([^\/]*)\.xml/ # Capture the file name of a file in the docx's word/ directory (not recursive) - - def initialize(docx_contents: nil, docx_path: nil) - if !docx_contents.nil? - docx_as_zip = get_docx_as_zip_object(docx_contents: docx_contents) - elsif !docx_path.nil? - docx_as_zip = get_docx_as_zip_object(docx_path: docx_path) - else - raise ArgumentError.new("Insufficient parameters. Either docx_contents or docx_path is required.") - end - - @word_lists_by_document_section = {document: [], endnotes: [], footnotes: [], footer: [], header: []} - - get_docx_readable_text_XML_files(docx_as_zip).each do |file| - file_name = file.name.match(DOCX_WORD_DIRECTORY_FILENAME_CAPTURE_PATTERN).captures[0] - viewable_strings = get_displayed_text_list(get_XML_object(file)) - - case file_name - when /^document/ - @word_lists_by_document_section[:document].concat(viewable_strings) - when /^endnotes/ - @word_lists_by_document_section[:endnotes].concat(viewable_strings) - when /^footnotes/ - @word_lists_by_document_section[:footnotes].concat(viewable_strings) - when /^footer/ - @word_lists_by_document_section[:footer].concat(viewable_strings) - when /^header/ - @word_lists_by_document_section[:header].concat(viewable_strings) - end - end - - @unsorted_word_lists_by_document_section = @word_lists_by_document_section.deep_dup - - @word_lists_by_document_section.each do |section, word_list| - sort_string_list_by_length_ascending(word_list) - end - end - - def get_word_list_all(sorted: true) - word_lists = if sorted - @word_lists_by_document_section - else - @unsorted_word_lists_by_document_section - end - - all_words_list = word_lists[:document] + - word_lists[:endnotes] + - word_lists[:footnotes] + - word_lists[:footer] + - word_lists[:header] - - sort_string_list_by_length_ascending(all_words_list) unless !sorted - - all_words_list - end - - def get_word_list_document(sorted: true) - if sorted - @word_lists_by_document_section[:document] - else - @unsorted_word_lists_by_document_section[:document] - end - end - - def get_word_list_endnotes(sorted: true) - if sorted - @word_lists_by_document_section[:endnotes] - else - @unsorted_word_lists_by_document_section[:endnotes] - end - end - - def get_word_list_footnotes(sorted: true) - if sorted - @word_lists_by_document_section[:footnotes] - else - @unsorted_word_lists_by_document_section[:footnotes] - end - end - - def get_word_list_footer(sorted: true) - if sorted - @word_lists_by_document_section[:footer] - else - @unsorted_word_lists_by_document_section[:footer] - end - end - - def get_word_list_header(sorted: true) - if sorted - @word_lists_by_document_section[:header] - else - @unsorted_word_lists_by_document_section[:header] - end - end - - def word_list_all_contains?(str) - word_list_contains_str?(get_word_list_all, str) - end - - def word_list_document_contains?(str) - word_list_contains_str?(get_word_list_document, str) - end - - def word_list_endnotes_contains?(str) - word_list_contains_str?(get_word_list_endnotes, str) - end - - def word_list_footnotes_contains?(str) - word_list_contains_str?(get_word_list_footnotes, str) - end - - def word_list_footer_contains?(str) - word_list_contains_str?(get_word_list_footer, str) - end - - def word_list_header_contains?(str) - word_list_contains_str?(get_word_list_header, str) - end - - def word_list_all_contains_in_order?(str_list) - word_list_contains_str_in_order?(get_word_list_all(sorted: false), str_list) - end - - def word_list_document_contains_in_order?(str_list) - word_list_contains_str_in_order?(get_word_list_document(sorted: false), str_list) - end - - def word_list_endnotes_contains_in_order?(str_list) - word_list_contains_str_in_order?(get_word_list_endnotes(sorted: false), str_list) - end - - def word_list_footnotes_contains_in_order?(str_list) - word_list_contains_str_in_order?(get_word_list_footnotes(sorted: false), str_list) - end - - def word_list_footer_contains_in_order?(str_list) - word_list_contains_str_in_order?(get_word_list_footer(sorted: false), str_list) - end - - def word_list_header_contains_in_order?(str_list) - word_list_contains_str_in_order?(get_word_list_header(sorted: false), str_list) - end - - private - - def get_displayed_text_list(xml_object) - xml_object.xpath("//w:t/text()").filter_map do |word_text_element| - stripped_text = word_text_element.text.strip - stripped_text if stripped_text.length > 0 - end - end - - def get_docx_as_zip_object(docx_contents: nil, docx_path: nil) - if !docx_contents.nil? - Zip::File.open_buffer(docx_contents) - elsif !docx_path.nil? - Zip::File.open(docx_path) - else - raise ArgumentError.new("Insufficient parameters. Either docx_contents or docx_path is required.") - end - end - - def get_docx_readable_text_XML_files(docx_as_zip) - docx_as_zip.entries.select do |entry| - entry_name = entry.name - is_ignored_file = false - xml_file_in_word_match = entry_name.match(DOCX_WORD_DIRECTORY_FILENAME_CAPTURE_PATTERN) - - unless xml_file_in_word_match.nil? - xml_file_name = xml_file_in_word_match.captures[0] - is_ignored_file = !IGNORED_FILE_LIST[xml_file_name].nil? - end - - !(xml_file_in_word_match.nil? || is_ignored_file) - end - end - - def get_XML_object(xml_file_as_docx_zip_entry) - Nokogiri::XML(xml_file_as_docx_zip_entry.get_input_stream.read) - end - - def search_string_list_for_index_of_first_string_of_at_least_n_length(string_list_sorted_by_length, n) - low = 0 - high = string_list_sorted_by_length.length - 1 - mid = (low + high) / 2 - - while low < high - if string_list_sorted_by_length[mid].length < n - low = mid + 1 - else - high = mid - 1 - end - - mid = (low + high) / 2 - end - - if string_list_sorted_by_length[mid].length < n - if string_list_sorted_by_length.length - 1 == mid - return nil - else - return mid + 1 - end - end - - [0, mid].max - end - - def sort_string_list_by_length_ascending(str_list) - str_list.sort_by!(&:length) - end - - def word_list_contains_str?(word_list, str) - first_possible_word_containing_str_index = search_string_list_for_index_of_first_string_of_at_least_n_length( - word_list, - str.length - ) - - if first_possible_word_containing_str_index.nil? - return false - end - - word_list[first_possible_word_containing_str_index..(word_list.length - 1)].each do |word| - if word.include?(str) - return true - end - end - - false - end - - def word_list_contains_str_in_order?(word_list, str_list) - return true unless str_list.present? - - str_index = 0 - word_list.each do |word| - if word.include?(str_list[str_index]) - str_index += 1 - return true if str_index == str_list.length - end - end - - false - end -end diff --git a/spec/support/download_helpers.rb b/spec/support/download_helpers.rb index d64d36a93c..9b9fd85424 100644 --- a/spec/support/download_helpers.rb +++ b/spec/support/download_helpers.rb @@ -20,6 +20,21 @@ def download_docx Docx::Document.open(download) end + def header_text(download_docx) + zip = download_docx.zip + files = zip.glob("word/header*.xml").map { |h| h.name } + filename_and_contents_pairs = files.map do |file| + simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "") + [simple_file_name, Nokogiri::XML(zip.read(file))] + end + + filename_and_contents_pairs.map { |name, doc| doc.text }.join("\n") + end + + def table_text(download_docx) + download_docx.tables.map { |t| t.rows.map(&:cells).flatten.map(&:to_s) }.flatten + end + def download_file_name File.basename(download) end