Skip to content

Commit

Permalink
WosRecords - immutable record set operations, with merge option
Browse files Browse the repository at this point in the history
  • Loading branch information
dazza-codes committed Oct 3, 2017
1 parent 80a75dc commit 3678a3b
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 46 deletions.
54 changes: 40 additions & 14 deletions lib/wos_records.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,20 @@ def initialize(records: nil, encoded_records: nil)
@doc = Nokogiri::XML(@records) { |config| config.strict.noblanks }
end

# Iterate over the REC nodes
# @yield rec [Nokogiri::XML::Element]
def each
rec_nodes.each { |rec| yield rec }
end

# @return uids [Array<String>] the rec_nodes UID values (in order)
def uids
rec_nodes.map { |rec| record_uid(rec) }
uid_nodes.map(&:text)
end

# @return uid_nodes [Nokogiri::XML::NodeSet] the rec_nodes UID nodes
def uid_nodes
rec_nodes.search('UID')
end

# Find duplicate WoS UID values
Expand All @@ -45,25 +52,43 @@ def duplicate_uids(record_setB)
# @return [Nokogiri::XML::NodeSet] duplicate records
def duplicate_records(record_setB)
uid_intersection = duplicate_uids(record_setB)
duplicates = record_setB.rec_nodes.select { |rec| uid_intersection.include? record_uid(rec) }
duplicates = if uid_intersection.empty?
[]
else
# create a new set of records, use a philosophy of immutability
# Nokogiri::XML::NodeSet enumerable methods do not return new objects
# Nokogiri::XML::Document.dup is a deep copy
docB = record_setB.doc.dup # do not chain Nokogiri methods
docB.search('REC').select { |rec| uid_intersection.include? record_uid(rec) }
end
Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new, duplicates)
end

# Find new WoS records, rejecting duplicates
# @param record_setB [WosRecords]
# @return [Nokogiri::XML::NodeSet] additional new records
def new_records(record_setB)
# create a new set of records, use a philosophy of immutability
# Nokogiri::XML::NodeSet enumerable methods do not return new objects
# Nokogiri::XML::Document.dup is a deep copy
docB = record_setB.doc.dup # do not chain Nokogiri methods
new_rec = docB.search('REC')
# reject duplicate records
uid_dups = duplicate_uids(record_setB)
new_rec = new_rec.reject { |rec| uid_dups.include? record_uid(rec) } unless uid_dups.empty?
Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new, new_rec)
end

# Merge WoS records
# @param record_setB [WosRecords]
# @return records [WosRecords] merged set of records
def merge_records(record_setB)
# create a new set of merged records, use a philosophy of immutability

# binding.pry
# TODO: why is this changing the record_setB????

dup_uids = duplicate_uids(record_setB)
merge_nodes = record_setB.rec_nodes.dup
merge_nodes.reject { |rec| dup_uids.include? record_uid(rec) } unless dup_uids.empty?
merge_doc = doc.dup
merge_doc.at('records').add_child(merge_nodes)
WosRecords.new(records: merge_doc.to_xml)
# create a new set of records, use a philosophy of immutability
# Nokogiri::XML::Document.dup is a deep copy
docA = doc.dup # do not chain Nokogiri methods
# merge new records and return a new WosRecords instance
docA.at('records').add_child(new_records(record_setB))
WosRecords.new(records: docA.to_xml)
end

# Pretty print the records in XML
Expand All @@ -88,10 +113,11 @@ def rec_nodes
attr_reader :records
attr_reader :encoded_records

# The UID for a WoS REC
# @param rec [Nokogiri::XML::Element] a Wos 'REC' element
# @return UID [String] a Wos 'UID' value
def record_uid(rec)
rec.search('UID').first.text
rec.search('UID').text
end

# @return decoded_records [String] WOS records in XML
Expand Down
85 changes: 53 additions & 32 deletions spec/lib/wos_records_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,27 @@
end
let(:wos_records_encoded) { described_class.new(encoded_records: encoded_records) }
let(:wos_records_decoded) { described_class.new(records: decoded_records) }
let(:merge_records) { File.read('spec/fixtures/wos_client/wos_merge_records.html') }
let(:record_setB) { described_class.new(encoded_records: merge_records) }
# let(:merge_records) { File.read('spec/fixtures/wos_client/wos_merge_records.html') }
# let(:record_setB) { described_class.new(encoded_records: merge_records) }

let(:recordsA) do
<<-XML_A
<records>
<REC><UID>WOS:A1</UID></REC>
<REC><UID>WOS:A2</UID></REC>
</records>
XML_A
end
let(:recordsB) do
<<-XML_B
<records>
<REC><UID>WOS:A2</UID></REC>
<REC><UID>WOS:B2</UID></REC>
</records>
XML_B
end
let(:wos_recordsA) { described_class.new(records: recordsA) }
let(:wos_recordsB) { described_class.new(records: recordsB) }

describe '#new' do
it 'works with encoded records' do
Expand Down Expand Up @@ -73,56 +92,58 @@
end

describe '#duplicate_uids' do
let(:dup_uids) { wos_recordsA.duplicate_uids(wos_recordsB) }

it 'works' do
result = wos_records_encoded.duplicate_uids(record_setB)
expect(result).to be_an Set
expect(dup_uids).to be_an Set
end
it 'returns duplicate UIDs' do
result = wos_records_encoded.duplicate_uids(record_setB)
expect(result.to_a).to eq record_setB.uids
expect(dup_uids.to_a).to eq ['WOS:A2']
end
it 'does not modify record_setB' do
uids_before = record_setB.uids
# binding.pry
wos_records_encoded.merge_records(record_setB)
expect(uids_before).to eq record_setB.uids
it 'does not modify records' do
expect { dup_uids }.not_to change { wos_recordsA.uids }
end
it 'does not modify input records' do
expect { dup_uids }.not_to change { wos_recordsB.uids }
end
end

describe '#duplicate_records' do
let(:dup_records) { wos_recordsA.duplicate_records(wos_recordsB) }

it 'works' do
result = wos_records_encoded.duplicate_records(record_setB)
expect(result).to be_an Nokogiri::XML::NodeSet
expect(dup_records).to be_an Nokogiri::XML::NodeSet
end
it 'returns duplicate record nodes' do
result = wos_records_encoded.duplicate_records(record_setB)
expect(result).to eq record_setB.rec_nodes
# Note: cannot use xpath here, i.e.
# See https://github.com/sparklemotion/nokogiri/issues/1677
# dup_records.xpath('//UID').to_s => "<UID>WOS:A2</UID><UID>WOS:B2</UID>"
uid = dup_records.search('UID').text
expect(uid).to eq 'WOS:A2'
end
it 'does not modify records' do
expect { dup_records }.not_to change { wos_recordsA.uids }
end
it 'does not modify input records' do
expect { dup_records }.not_to change { wos_recordsB.uids }
end
end

describe '#merge_records' do
let(:merge_records) { wos_recordsA.merge_records(wos_recordsB) }

it 'works' do
result = wos_records_encoded.merge_records(record_setB)
expect(result).to be_an described_class
expect(merge_records).to be_an described_class
end
it 'has merged records' do
# The record_setB contains the wos_records_encoded already; the
# existing records contains one of the record_setB records, which must
# be rejected prior to merging the records.
uids = record_setB.uids
result = wos_records_encoded.merge_records(record_setB)
expect(result.uids).to eq uids
expect(merge_records.uids).to eq %w(WOS:A1 WOS:A2 WOS:B2)
end
# Immutability specs
it 'does not modify existing records' do
uids = wos_records_encoded.uids
wos_records_encoded.merge_records(record_setB)
expect(uids).to eq wos_records_encoded.uids
end
it 'does not modify record_setB' do
uids = record_setB.uids
wos_records_encoded.merge_records(record_setB)
expect(uids).to eq record_setB.uids
it 'does not modify records' do
expect { merge_records }.not_to change { wos_recordsA.uids }
end
it 'does not modify input records' do
expect { merge_records }.not_to change { wos_recordsB.uids }
end
end

Expand Down

0 comments on commit 3678a3b

Please sign in to comment.