Skip to content

Commit

Permalink
Merge pull request #156 from sbesson/search_max_fileset_size
Browse files Browse the repository at this point in the history
Fileset indexing: address performance issues and add configurability
  • Loading branch information
jburel committed Mar 9, 2023
2 parents 955f350 + 23a8902 commit 5ff911e
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
38 changes: 30 additions & 8 deletions src/main/java/ome/services/fulltext/FullTextBridge.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,8 @@
*
* @author Josh Moore, josh at glencoesoftware.com
* @since 3.0-Beta3
* @see <a href="https://trac.openmicroscopy.org.uk/ome/FileParsers">Parsers</a
* href>
* @see <a href="https://trac.openmicroscopy.org.uk/ome/SearchBridges">Bridges</a
* href>
* @see <a href="https://omero.readthedocs.io/en/stable/developers/Search/FileParsers.html">Parsers</a>
* @see <a href="https://omero.readthedocs.io/en/stable/developers/Modules/Search/Bridges.html">Bridges</a>
*/
@Deprecated
public class FullTextBridge extends BridgeHelper {
Expand All @@ -67,6 +65,7 @@ public class FullTextBridge extends BridgeHelper {
final protected OriginalFilesService files;
final protected Map<String, FileParser> parsers;
final protected Class<FieldBridge>[] classes;
protected int maxFilesetSize;

/**
* Since this constructor provides the instance with no way of parsing
Expand Down Expand Up @@ -100,8 +99,7 @@ public FullTextBridge(OriginalFilesService files,
* set of {@link FieldBridge bridge classes} which will be
* instantiated via a no-arg constructor.
* @see <a
* href="https://trac.openmicroscopy.org.uk/ome/SearchBridges">Bridges</a
* href>
* href="https://omero.readthedocs.io/en/stable/developers/Modules/Search/Bridges.html">Bridges</a>
*/
@SuppressWarnings("unchecked")
public FullTextBridge(OriginalFilesService files,
Expand All @@ -111,6 +109,15 @@ public FullTextBridge(OriginalFilesService files,
this.classes = bridgeClasses == null ? new Class[] {} : bridgeClasses;
}

/**
* Sets the cut-off for indexing filesets
* @param maxFilesetSize the maximume fileset size
*/
public void setMaxFilesetSize(int maxFilesetSize) {
logger().info("Setting maximum fileset size to {}", maxFilesetSize);
this.maxFilesetSize = maxFilesetSize;
}

/**
* Default implementation of the
* {@link #set(String, Object, Document, LuceneOptions)}
Expand Down Expand Up @@ -388,24 +395,39 @@ public void set_folders(final String name, final IObject object,
*
* - fileset.entry.clientPath
* - fileset.entry.name
* - fileset.templatePrefix
*/
public void set_fileset(final String name, final IObject object,
final Document document, final LuceneOptions opts) {
if (object instanceof Image) {
final Image image = (Image) object;
final Fileset fileset = image.getFileset();
if (fileset == null) {
if (fileset == null || maxFilesetSize < 1) {
return;
}

add(document, "fileset.templatePrefix", fileset.getTemplatePrefix(), opts);

final Iterator<FilesetEntry> entryIterator = fileset.iterateUsedFiles();
int index = 0;
while (entryIterator.hasNext()) {
final FilesetEntry entry = entryIterator.next();
if (entry == null) {
continue;
}

// Skip fileset indexing above a cut-off
// As the fileset indexing scales with the number of fileset entries for each
// image, this operation can quickly lead to performance degradation notable
// in domains like high-content screening where each of 1K-10K images in a plate
// can be associated with 10-100K files
index++;
if (index > maxFilesetSize) {
return;
}

add(document, "fileset.entry.clientPath", entry.getClientPath(), opts);
add(document, "fileset.entry.name", entry.getOriginalFile().getName(), opts);
add(document, "fileset.templatePrefix", fileset.getTemplatePrefix(), opts);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions src/main/resources/ome/services/service-ome.api.Search.xml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
<constructor-arg ref="fileParsers"/>
<constructor-arg ref="/OMERO/Files"/>
<constructor-arg value="${omero.search.bridges}"/>
<property name="maxFilesetSize" value="${omero.search.max_fileset_size}"/>
</bean>

<!-- Use "*" as a wildcard parser -->
Expand Down
6 changes: 6 additions & 0 deletions src/main/resources/omero-server.properties
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,12 @@ ome.model.screen.PlateAcquisition,ome.model.screen.Well
# modified.
omero.search.include_actions=INSERT,UPDATE,REINDEX,DELETE

# Maximum number of fileset entries which will be indexed
# Increasing this cut-off can lead to indexing performance degradation
# notably in the high-content screening domain where plates typically
# contain 1K-10K images associated with 10-100K fileset entries each
# If set to 0, no fileset entry will be indexed
omero.search.max_fileset_size=10

##
## Old loader: "persistentEventLogLoader"
Expand Down

0 comments on commit 5ff911e

Please sign in to comment.