Skip to content
This repository has been archived by the owner on May 11, 2022. It is now read-only.

Commit

Permalink
Merge pull request #496 from sul-dlss/workflow-indexer2
Browse files Browse the repository at this point in the history
Only index the newest workflow processes
  • Loading branch information
jcoyne committed Jan 30, 2019
2 parents bbd8dde + b99aacf commit 70a6677
Show file tree
Hide file tree
Showing 12 changed files with 164 additions and 15 deletions.
1 change: 1 addition & 0 deletions lib/dor-services.rb
Expand Up @@ -90,6 +90,7 @@ def logger
autoload :IdentifiableIndexer
autoload :ProcessableIndexer
autoload :ReleasableIndexer
autoload :WorkflowIndexer
end

# datastreams
Expand Down
5 changes: 2 additions & 3 deletions lib/dor/datastreams/workflow_ds.rb
Expand Up @@ -82,9 +82,8 @@ def current_priority
cp.priority.to_i
end

def to_solr(solr_doc = {}, *args)
# super solr_doc, *args
workflows.each { |wf| solr_doc = wf.to_solr(solr_doc, *args) }
def to_solr(solr_doc = {}, *_args)
# noop - indexing is done by the WorkflowIndexer
solr_doc
end

Expand Down
27 changes: 27 additions & 0 deletions lib/dor/indexers/workflow_indexer.rb
@@ -0,0 +1,27 @@
# frozen_string_literal: true

module Dor
# Indexes the objects position in workflows
class WorkflowIndexer
include SolrDocHelper

attr_reader :resource
def initialize(resource:)
@resource = resource
end

# @return [Hash] the partial solr document for workflow concerns
def to_solr
{}.tap do |solr_doc|
workflows.each { |wf| solr_doc = wf.to_solr(solr_doc) }
end
end

private

# @returns [Array<Dor::WorkflowDocument>]
def workflows
resource.workflows.workflows
end
end
end
3 changes: 2 additions & 1 deletion lib/dor/models/admin_policy_object.rb
Expand Up @@ -15,7 +15,8 @@ class AdminPolicyObject < Dor::Abstract
DescribableIndexer,
EditableIndexer,
IdentifiableIndexer,
ProcessableIndexer
ProcessableIndexer,
WorkflowIndexer
)
end
end
3 changes: 2 additions & 1 deletion lib/dor/models/collection.rb
Expand Up @@ -11,7 +11,8 @@ class Collection < Dor::Set
DescribableIndexer,
IdentifiableIndexer,
ProcessableIndexer,
ReleasableIndexer
ReleasableIndexer,
WorkflowIndexer
)
end
end
3 changes: 2 additions & 1 deletion lib/dor/models/item.rb
Expand Up @@ -18,7 +18,8 @@ class Item < Dor::Abstract
DescribableIndexer,
IdentifiableIndexer,
ProcessableIndexer,
ReleasableIndexer
ReleasableIndexer,
WorkflowIndexer
)

has_metadata name: 'technicalMetadata', type: TechnicalMetadataDS, label: 'Technical Metadata', control_group: 'M'
Expand Down
3 changes: 2 additions & 1 deletion lib/dor/models/set.rb
Expand Up @@ -11,7 +11,8 @@ class Set < Dor::Abstract
DataIndexer,
DescribableIndexer,
IdentifiableIndexer,
ProcessableIndexer
ProcessableIndexer,
WorkflowIndexer
)
end
end
3 changes: 2 additions & 1 deletion lib/dor/models/workflow_object.rb
Expand Up @@ -14,7 +14,8 @@ class WorkflowObject < Dor::Abstract
DataIndexer,
DescribableIndexer,
IdentifiableIndexer,
ProcessableIndexer
ProcessableIndexer,
WorkflowIndexer
)

def self.find_by_name(name)
Expand Down
3 changes: 2 additions & 1 deletion lib/dor/workflow/document.rb
Expand Up @@ -67,7 +67,8 @@ def processes
@processes ||=
if definition
definition.processes.collect do |process|
node = ng_xml.at("/workflow/process[@name = '#{process.name}']")
nodes = ng_xml.xpath("/workflow/process[@name = '#{process.name}']")
node = nodes.max { |a, b| a.attr('version').to_i <=> b.attr('version').to_i }
process.update!(node, self)
end
else
Expand Down
3 changes: 1 addition & 2 deletions spec/datastreams/rights_metadata_spec.rb
Expand Up @@ -2,14 +2,13 @@

require 'spec_helper'

describe Dor::RightsMetadataDS do
RSpec.describe Dor::RightsMetadataDS do
before(:each) { stub_config }
after(:each) { unstub_config }

before(:each) do
@item = instantiate_fixture('druid:bb046xn0881', Dor::Item)
allow(Dor).to receive(:find).with(@item.pid).and_return(@item)
allow(@item).to receive(:workflows).and_return(double)
allow(Dor::Config.workflow.client).to receive(:get_milestones).and_return([])
end

Expand Down
90 changes: 90 additions & 0 deletions spec/indexers/workflow_indexer_spec.rb
@@ -0,0 +1,90 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe Dor::WorkflowIndexer do
# before { stub_config }
# after { unstub_config }
let(:obj) { instantiate_fixture('druid:ab123cd4567', Dor::Item) }
let(:indexer) { described_class.new(resource: obj) }

describe '#to_solr' do
let(:solr_doc) { indexer.to_solr }
let(:xml) do
<<~XML
<workflows objectId="druid:ab123cd4567">
<workflow repository="dor" objectId="druid:ab123cd4567" id="accessionWF">
<process version="1" priority="0" note="" lifecycle="submitted" laneId="default" elapsed="" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="start-accession"/>
<process version="1" priority="0" note="common-accessioning-stage-a.stanford.edu" lifecycle="described" laneId="default" elapsed="0.258" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="descriptive-metadata"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.188" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="rights-metadata"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.255" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="content-metadata"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.948" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="technical-metadata"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.15" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="remediate-object"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.479" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="shelve"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="published" laneId="default" elapsed="1.188" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="publish"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.251" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="provenance-metadata"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="2.257" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="sdr-ingest-transfer"/>
<process version="1" priority="0" note="preservationIngestWF completed on preservation-robots1-stage.stanford.edu" lifecycle="deposited" laneId="default" elapsed="1.0" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="sdr-ingest-received"/>
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.246" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="reset-workspace"/>
<process version="1" priority="0" note="common-accessioning-stage-a.stanford.edu" lifecycle="accessioned" laneId="default" elapsed="1.196" attempts="0" datetime="2019-01-28T20:41:12+00:00" status="completed" name="end-accession"/>
</workflow>
<workflow repository="dor" objectId="druid:ab123cd4567" id="assemblyWF">
<process version="1" priority="0" note="" lifecycle="pipelined" laneId="default" elapsed="" attempts="0" datetime="2019-01-28T20:40:18+00:00" status="completed" name="start-assembly"/>
<process version="1" priority="0" note="" lifecycle="" laneId="default" elapsed="" attempts="0" datetime="2019-01-28T20:40:18+00:00" status="skipped" name="jp2-create"/>
<process version="1" priority="0" note="sul-robots1-test.stanford.edu" lifecycle="" laneId="default" elapsed="0.25" attempts="0" datetime="2019-01-28T20:40:18+00:00" status="completed" name="checksum-compute"/>
<process version="1" priority="0" note="sul-robots1-test.stanford.edu" lifecycle="" laneId="default" elapsed="0.306" attempts="0" datetime="2019-01-28T20:40:18+00:00" status="completed" name="exif-collect"/>
<process version="1" priority="0" note="sul-robots2-test.stanford.edu" lifecycle="" laneId="default" elapsed="0.736" attempts="0" datetime="2019-01-28T20:40:18+00:00" status="completed" name="accessioning-initiate"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="" attempts="0" datetime="2019-01-29T22:51:09+00:00" status="completed" name="start-assembly"/>
<process version="2" priority="0" note="contentMetadata.xml exists" lifecycle="" laneId="default" elapsed="0.278" attempts="0" datetime="2019-01-29T22:51:09+00:00" status="skipped" name="content-metadata-create"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-29T22:51:09+00:00" status="error" name="jp2-create"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-29T22:51:09+00:00" status="queued" name="checksum-compute"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-29T22:51:09+00:00" status="queued" name="exif-collect"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-29T22:51:09+00:00" status="queued" name="accessioning-initiate"/>
</workflow>
<workflow repository="dor" objectId="druid:ab123cd4567" id="disseminationWF">
<process version="1" priority="0" note="common-accessioning-stage-b.stanford.edu" lifecycle="" laneId="default" elapsed="0.826" attempts="0" datetime="2019-01-28T20:46:57+00:00" status="completed" name="cleanup"/>
</workflow>
<workflow repository="dor" objectId="druid:ab123cd4567" id="hydrusAssemblyWF">
<process version="1" priority="0" note="" lifecycle="registered" laneId="default" elapsed="" attempts="0" datetime="2019-01-28T20:37:43+00:00" status="completed" name="start-deposit"/>
<process version="1" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-28T20:37:43+00:00" status="completed" name="submit"/>
<process version="1" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-28T20:37:43+00:00" status="completed" name="approve"/>
<process version="1" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-28T20:37:43+00:00" status="completed" name="start-assembly"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-28T20:48:17+00:00" status="completed" name="submit"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-28T20:48:17+00:00" status="completed" name="approve"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="0.0" attempts="0" datetime="2019-01-28T20:48:18+00:00" status="completed" name="start-assembly"/>
</workflow>
<workflow repository="dor" objectId="druid:ab123cd4567" id="versioningWF">
<process version="2" priority="0" note="" lifecycle="opened" laneId="default" elapsed="" attempts="0" datetime="2019-01-28T20:48:16+00:00" status="completed" name="start-version"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="" attempts="1" datetime="2019-01-28T20:48:16+00:00" status="completed" name="submit-version"/>
<process version="2" priority="0" note="" lifecycle="" laneId="default" elapsed="" attempts="1" datetime="2019-01-28T20:48:16+00:00" status="completed" name="start-accession"/>
</workflow>
</workflows>
XML
end

let(:assemblyWF) { instance_double(Dor::WorkflowObject, definition: definition) }
let(:definition) { instance_double(Dor::WorkflowDefinitionDs, processes: workflow_steps) }
let(:workflow_name) { 'assemblyWF' }
let(:workflow_steps) do
[
Dor::Workflow::Process.new('dor', workflow_name, 'name' => 'start-assembly', 'sequence' => 1),
Dor::Workflow::Process.new('dor', workflow_name, 'name' => 'content-metadata-create', 'sequence' => 2),
Dor::Workflow::Process.new('dor', workflow_name, 'name' => 'jp2-create', 'sequence' => 3),
Dor::Workflow::Process.new('dor', workflow_name, 'name' => 'checksum-compute', 'sequence' => 4),
Dor::Workflow::Process.new('dor', workflow_name, 'name' => 'exif-collect', 'sequence' => 5),
Dor::Workflow::Process.new('dor', workflow_name, 'name' => 'accessioning-initiate', 'sequence' => 6)
]
end

before do
WebMock.disable_net_connect!
allow(Dor::WorkflowObject).to receive(:find_by_name).and_return(assemblyWF)
allow(Dor::Config.workflow.client).to receive(:get_workflow_xml).and_return(xml)
end

describe 'workflow_status_ssim' do
subject { solr_doc['workflow_status_ssim'] }
it { is_expected.to eq ['accessionWF|completed|0|dor', 'assemblyWF|active|1|dor', 'disseminationWF|active|1|dor', 'hydrusAssemblyWF|active|1|dor', 'versioningWF|active|1|dor'] }
end
end
end
35 changes: 31 additions & 4 deletions spec/models/workflow_document_spec.rb
Expand Up @@ -7,14 +7,19 @@
# stub the wf definition. The workflow document updates the processes in the definition with the values from the xml.
@wf_definition = double(Dor::WorkflowObject)
wf_definition_procs = []
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => 'hello', 'lifecycle' => 'lc', 'status' => 'stat', 'sequence' => '1')
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => 'goodbye', 'status' => 'waiting', 'sequence' => '2', 'prerequisite' => ['hello'])
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => 'technical-metadata', 'status' => 'error', 'sequence' => '3')
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => 'some-other-step', 'sequence' => '4')
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => step1, 'lifecycle' => 'lc', 'status' => 'stat', 'sequence' => '1')
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => step2, 'status' => 'waiting', 'sequence' => '2', 'prerequisite' => ['hello'])
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => step3, 'status' => 'error', 'sequence' => '3')
wf_definition_procs << Dor::Workflow::Process.new('accessionWF', 'dor', 'name' => step4, 'sequence' => '4')

allow(@wf_definition).to receive(:processes).and_return(wf_definition_procs)
end

let(:step1) { 'hello' }
let(:step2) { 'goodbye' }
let(:step3) { 'technical-metadata' }
let(:step4) { 'some-other-step' }

describe '#processes' do
let(:document) { described_class.new(xml) }
subject(:processes) { document.processes }
Expand Down Expand Up @@ -50,6 +55,28 @@
end
end

context 'when the xml contains a process list with an old version completed' do
let(:xml) do
<<-eos
<?xml version="1.0" encoding="UTF-8"?>
<workflow repository="dor" objectId="druid:gv054hp4128" id="accessionWF">
<process version="1" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="completed" name="hello"/>
<process version="1" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="completed" name="goodbye"/>
<process version="1" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="completed" name="technical-metadata"/>
<process version="1" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="completed" name="some-other-step"/>
<process version="2" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="completed" name="hello"/>
<process version="2" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="queued" name="goodbye"/>
<process version="2" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="queued" name="technical-metadata"/>
<process version="2" lifecycle="submitted" elapsed="0.0" archived="true" attempts="1" datetime="2012-11-06T16:18:24-0800" status="queued" name="some-other-step"/>
</workflow>
eos
end

it 'returns only the most recent versions' do
expect(processes.map(&:version)).to all(eq '2')
end
end

context 'when the xml contains a process list with completed items' do
let(:xml) do
<<-eos
Expand Down

0 comments on commit 70a6677

Please sign in to comment.