Skip to content

Commit

Permalink
clean up the csv import functionality
Browse files Browse the repository at this point in the history
* centralize this shared functionality to a file in app/lib, move tests to corresponding spec file, have everything that needs the functionality use the app/lib method
* give it a slighty better name
* stop delegating unnecessarily in Bundle, and stop calling it via Bundle.  remove duplicate test in bundle_spec.rb
* excise unnecessary redefinition and tests in bundle_context_temporary and its _spec
* switch to using the more standard ArgumentError instead of the custom BundleUsageError
  • Loading branch information
jmartin-sul committed Sep 11, 2018
1 parent 932051d commit 93cb0b2
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 77 deletions.
15 changes: 1 addition & 14 deletions app/lib/bundle_context_temporary.rb
Expand Up @@ -57,27 +57,14 @@ def validate_files?
# grab bag
####

# TODO: BundleContext is not really a logical home for this util method
# load CSV allowing UTF-8 to pass through, deleting blank columns
# @param [String] filename
# @return [Array<ActiveSupport::HashWithIndifferentAccess>]
# @raise if file missing/unreadable
def self.import_csv(filename)
raise BundleUsageError, "CSV filename required" unless filename.present?
raise BundleUsageError, "Required file not found: #{filename}." unless File.readable?(filename)
file_contents = IO.read(filename).encode("utf-8", replace: nil)
csv = CSV.parse(file_contents, :headers => true)
csv.map { |row| row.to_hash.with_indifferent_access }
end

def path_in_bundle(rel_path)
File.join(bundle_dir, rel_path)
end

# On first call, loads the manifest data, caches results
# @return [Array<ActiveSupport::HashWithIndifferentAccess>]
def manifest_rows
@manifest_rows ||= self.class.import_csv(manifest)
@manifest_rows ||= CsvImporter.parse_to_hash(manifest)
end

####
Expand Down
13 changes: 13 additions & 0 deletions app/lib/csv_importer.rb
@@ -0,0 +1,13 @@
class CsvImporter
# load CSV allowing UTF-8 to pass through, deleting blank columns
# @param [String] filename
# @return [Array<ActiveSupport::HashWithIndifferentAccess>]
# @raise if file missing/unreadable
def self.parse_to_hash(filename)
raise ArgumentError, "CSV filename required" unless filename.present?
raise ArgumentError, "Required file not found: #{filename}." unless File.readable?(filename)
file_contents = IO.read(filename).encode("utf-8", replace: nil)
csv = CSV.parse(file_contents, :headers => true)
csv.map { |row| row.to_hash.with_indifferent_access }
end
end
5 changes: 0 additions & 5 deletions app/lib/pre_assembly/bundle.rb
@@ -1,6 +1,5 @@
# encoding: UTF-8

require 'csv'
require 'ostruct'
require 'pathname'

Expand Down Expand Up @@ -36,10 +35,6 @@ class Bundle
:validate_files?,
to: :bundle_context

class << self
delegate :import_csv, to: BundleContextTemporary
end

def initialize(bundle_context)
@bundle_context = bundle_context
self.skippables = {}
Expand Down
2 changes: 1 addition & 1 deletion app/lib/pre_assembly/smpl.rb
Expand Up @@ -46,7 +46,7 @@ def initialize(params)

def load_manifest
# load file into @rows and then build up @manifest
@rows = PreAssembly::Bundle.import_csv(@csv_filename)
@rows = CsvImporter.parse_to_hash(@csv_filename)

@manifest = {}

Expand Down
15 changes: 1 addition & 14 deletions app/models/bundle_context.rb
Expand Up @@ -66,23 +66,10 @@ def path_in_bundle(rel_path)
File.join(bundle_dir, rel_path)
end

# TODO: BundleContext is not really a logical home for this util method
# load CSV allowing UTF-8 to pass through, deleting blank columns
# @param [String] filename
# @return [Array<ActiveSupport::HashWithIndifferentAccess>]
# @raise if file missing/unreadable
def self.import_csv(filename)
raise BundleUsageError, "CSV filename required" unless filename.present?
raise BundleUsageError, "Required file not found: #{filename}." unless File.readable?(filename)
file_contents = IO.read(filename).encode("utf-8", replace: nil)
csv = CSV.parse(file_contents, :headers => true)
csv.map { |row| row.to_hash.with_indifferent_access }
end

# On first call, loads the manifest data, caches results
# @return [Array<ActiveSupport::HashWithIndifferentAccess>]
def manifest_rows
@manifest_rows ||= self.class.import_csv(path_in_bundle(manifest))
@manifest_rows ||= CsvImporter.parse_to_hash(path_in_bundle(manifest))
end

def manifest_cols
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/bundle_context_temporary_spec.rb
Expand Up @@ -54,7 +54,7 @@
it "raises exception if required file not found" do
revs_context.manifest = '__foo_manifest###'
expect(File).to receive(:readable?).with(revs_context.manifest).and_return(false)
expect { revs_context.validate_usage }.to raise_error(BundleUsageError, /Required file not found/)
expect { revs_context.validate_usage }.to raise_error(ArgumentError, /Required file not found/)
end

it "raises an exception since the sourceID column is misspelled" do
Expand Down
18 changes: 18 additions & 0 deletions spec/lib/csv_importer_spec.rb
@@ -0,0 +1,18 @@
RSpec.describe CsvImporter do
describe '#parse_to_hash' do
let(:manifest) do
described_class.parse_to_hash("#{Rails.root}/spec/test_data/bundle_input_a/manifest.csv")
end

it "loads a CSV as a hash with indifferent access" do
expect(manifest).to be_an(Array)
expect(manifest.size).to eq(3)
headers = %w{format sourceid filename label year inst_notes prod_notes has_more_metadata description}
expect(manifest).to all(be_an(ActiveSupport::HashWithIndifferentAccess)) # accessible w/ string and symbols
expect(manifest).to all(include(*headers))
expect(manifest[0][:description]).to be_nil
expect(manifest[1][:description]).to eq('')
expect(manifest[2][:description]).to eq('yo, this is a description')
end
end
end
23 changes: 0 additions & 23 deletions spec/lib/pre_assembly/bundle_spec.rb
Expand Up @@ -42,21 +42,6 @@
end
end

describe '#import_csv' do
let(:manifest) { described_class.import_csv(Rails.root.join('spec/test_data/bundle_input_a/manifest.csv')) }

it "loads a CSV as a hash with indifferent access" do
expect(manifest).to be_an(Array)
expect(manifest.size).to eq(3)
headers = %w{format sourceid filename label year inst_notes prod_notes has_more_metadata description}
expect(manifest).to all(be_an(ActiveSupport::HashWithIndifferentAccess)) # accessible w/ string and symbols
expect(manifest).to all(include(*headers))
expect(manifest[0][:description]).to be_nil
expect(manifest[1][:description]).to eq('')
expect(manifest[2][:description]).to eq('yo, this is a description')
end
end

describe '#run_log_msg' do
it 'returns a string' do
expect(revs.run_log_msg).to be_a(String)
Expand Down Expand Up @@ -199,14 +184,6 @@
end
end

describe '#manifest_rows' do
it "loads the manifest CSV only once, during the validation phase, and return all three rows even if you access the manifest multiple times" do
expect(revs.manifest_rows.size).to eq 3
expect(described_class).not_to receive(:import_csv)
revs.manifest_rows
end
end

describe '#validate_files' do
it "returns expected tally if all images are valid" do
skip "validate_files has depedencies on exiftool, making it sometimes incorrectly fail...it basically exercises methods already adequately tested in the assembly-objectfile gem"
Expand Down
21 changes: 2 additions & 19 deletions spec/models/bundle_context_spec.rb
Expand Up @@ -117,31 +117,14 @@
skip("Need to figure out where to set this path via planning meeting 9/10/18")
end

describe '#import_csv' do
let(:manifest) do
described_class.import_csv("#{Rails.root}/spec/test_data/bundle_input_a/manifest.csv")
end

it "loads a CSV as a hash with indifferent access" do
expect(manifest).to be_an(Array)
expect(manifest.size).to eq(3)
headers = %w{format sourceid filename label year inst_notes prod_notes has_more_metadata description}
expect(manifest).to all(be_an(ActiveSupport::HashWithIndifferentAccess)) # accessible w/ string and symbols
expect(manifest).to all(include(*headers))
expect(manifest[0][:description]).to be_nil
expect(manifest[1][:description]).to eq('')
expect(manifest[2][:description]).to eq('yo, this is a description')
end
end

describe "manifest_rows" do
it "loads the manifest CSV" do
expect(described_class).to receive(:import_csv).with("spec/test_data/bundle_input_g/manifest.csv")
expect(CsvImporter).to receive(:parse_to_hash).with("spec/test_data/bundle_input_g/manifest.csv")
bc.manifest_rows
end

it "memoizes the manifest rows" do
expect(described_class).to receive(:import_csv).once.with("spec/test_data/bundle_input_g/manifest.csv").and_call_original
expect(CsvImporter).to receive(:parse_to_hash).once.with("spec/test_data/bundle_input_g/manifest.csv").and_call_original
2.times { bc.manifest_rows }
end

Expand Down

0 comments on commit 93cb0b2

Please sign in to comment.