Permalink
Browse files

NEXUS-5380 force lucene compound index to reduce number of open files

Signed-off-by: Igor Fedorenko <igor@ifedorenko.com>
  • Loading branch information...
1 parent 3f9d3ec commit a364b45a0273690a356cf134c32629f4c6fc847a @ifedorenko ifedorenko committed Nov 15, 2012
View
2 nexus/plugins/indexer/nexus-indexer-lucene-plugin/pom.xml
@@ -28,7 +28,7 @@
<packaging>nexus-plugin</packaging>
<properties>
- <maven.indexer.version>5.0.0</maven.indexer.version>
+ <maven.indexer.version>5.1.0-SNAPSHOT</maven.indexer.version>
<nexus.plugin.name>Nexus Indexer Lucene Plugin</nexus.plugin.name>
<nexus.plugin.description>Adds search capabilities for repository content.</nexus.plugin.description>
View
325 ...xer-lucene-plugin/src/main/java/copied/org/apache/maven/index/DefaultScannerListener.java
@@ -1,325 +0,0 @@
-package copied.org.apache.maven.index;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.TopScoreDocCollector;
-import org.apache.maven.index.ArtifactContext;
-import org.apache.maven.index.ArtifactInfo;
-import org.apache.maven.index.ArtifactScanningListener;
-import org.apache.maven.index.IndexerEngine;
-import org.apache.maven.index.ScanningResult;
-import org.apache.maven.index.context.IndexingContext;
-import org.codehaus.plexus.logging.AbstractLogEnabled;
-
-/**
- * A default scanning listener
- *
- * @author Eugene Kuleshov
- */
-public class DefaultScannerListener
- extends AbstractLogEnabled
- implements ArtifactScanningListener
-{
- private final IndexingContext context;
-
- private final IndexerEngine indexerEngine;
-
- private final boolean update;
-
- private final ArtifactScanningListener listener;
-
- private final Set<String> uinfos = new HashSet<String>();
-
- private final Set<String> processedUinfos = new HashSet<String>();
-
- private final Set<String> allGroups = new HashSet<String>();
-
- private final Set<String> groups = new HashSet<String>();
-
- private final List<Exception> exceptions = new ArrayList<Exception>();
-
- private int count = 0;
-
- public DefaultScannerListener( IndexingContext context, //
- IndexerEngine indexerEngine, boolean update, //
- ArtifactScanningListener listener )
- {
- this.context = context;
- this.indexerEngine = indexerEngine;
- this.update = update;
- this.listener = listener;
- }
-
- public void scanningStarted( IndexingContext ctx )
- {
- try
- {
- if ( update )
- {
- initialize( ctx );
- }
- }
- catch ( IOException ex )
- {
- exceptions.add( ex );
- }
-
- if ( listener != null )
- {
- listener.scanningStarted( ctx );
- }
- }
-
- public void artifactDiscovered( ArtifactContext ac )
- {
- String uinfo = ac.getArtifactInfo().getUinfo();
-
- // TODO: scattered across commented out changes while I was fixing NEXUS-2712, cstamas
- // These changes should be applied by borks too much the fragile indexer
-
- // if ( VersionUtils.isSnapshot( ac.getArtifactInfo().version ) && processedUinfos.contains( uinfo ) )
- if ( processedUinfos.contains( uinfo ) )
- {
- return; // skip individual snapshots
- }
-
- boolean adding = processedUinfos.add( uinfo );
-
- if ( uinfos.contains( uinfo ) )
- {
- // already indexed
- uinfos.remove( uinfo );
- return;
- }
-
- try
- {
- if ( listener != null )
- {
- listener.artifactDiscovered( ac );
- }
-
- if ( adding )
- {
- indexerEngine.index( context, ac );
- }
- else
- {
- indexerEngine.update( context, ac );
- }
-
- for ( Exception e : ac.getErrors() )
- {
- artifactError( ac, e );
- }
-
- groups.add( ac.getArtifactInfo().getRootGroup() );
- allGroups.add( ac.getArtifactInfo().groupId );
-
- count++;
- }
- catch ( IOException ex )
- {
- artifactError( ac, ex );
- }
- }
-
- public void scanningFinished( IndexingContext ctx, ScanningResult result )
- {
- result.setTotalFiles( count );
-
- for ( Exception ex : exceptions )
- {
- result.addException( ex );
- }
-
- try
- {
- context.optimize();
-
- context.setRootGroups( groups );
-
- context.setAllGroups( allGroups );
-
- if ( update && !context.isReceivingUpdates() )
- {
- removeDeletedArtifacts( context, result, result.getRequest().getStartingPath() );
- }
- }
- catch ( IOException ex )
- {
- result.addException( ex );
- }
-
- if ( listener != null )
- {
- listener.scanningFinished( ctx, result );
- }
-
- if ( result.getDeletedFiles() > 0 || result.getTotalFiles() > 0 )
- {
- try
- {
- context.updateTimestamp( true );
-
- context.optimize();
- }
- catch ( Exception ex )
- {
- result.addException( ex );
- }
- }
- }
-
- public void artifactError( ArtifactContext ac, Exception e )
- {
- exceptions.add( e );
-
- if ( listener != null )
- {
- listener.artifactError( ac, e );
- }
- }
-
- private void initialize( IndexingContext ctx )
- throws IOException, CorruptIndexException
- {
- final IndexSearcher indexSearcher = ctx.acquireIndexSearcher();
- try
- {
- final IndexReader r = indexSearcher.getIndexReader();
-
- for ( int i = 0; i < r.maxDoc(); i++ )
- {
- if ( !r.isDeleted( i ) )
- {
- Document d = r.document( i );
-
- String uinfo = d.get( ArtifactInfo.UINFO );
-
- if ( uinfo != null )
- {
- // if ctx is receiving updates (in other words, is a proxy),
- // there is no need to build a huge Set of strings with all uinfo's
- // as deletion detection in those cases have no effect. Also, the
- // removeDeletedArtifacts() method, that uses info gathered in this set
- // is invoked with same condition. As indexes of Central are getting huge,
- // the set grows enormously too, but is actually not used
- if ( !ctx.isReceivingUpdates() )
- {
- uinfos.add( uinfo );
- }
-
- // add all existing groupIds to the lists, as they will
- // not be "discovered" and would be missing from the new list..
- String groupId = uinfo.substring( 0, uinfo.indexOf( '|' ) );
- int n = groupId.indexOf( '.' );
- groups.add( n == -1 ? groupId : groupId.substring( 0, n ) );
- allGroups.add( groupId );
- }
- }
- }
- }
- finally
- {
- ctx.releaseIndexSearcher( indexSearcher );
- }
- }
-
- private void removeDeletedArtifacts( IndexingContext context, ScanningResult result, String contextPath )
- throws IOException
- {
- int deleted = 0;
-
- final IndexSearcher indexSearcher = context.acquireIndexSearcher();
- try
- {
- for ( String uinfo : uinfos )
- {
- TopScoreDocCollector collector = TopScoreDocCollector.create( 1, false );
-
- indexSearcher.search( new TermQuery( new Term( ArtifactInfo.UINFO, uinfo ) ), collector );
-
- if ( collector.getTotalHits() > 0 )
- {
- String[] ra = ArtifactInfo.FS_PATTERN.split( uinfo );
-
- ArtifactInfo ai = new ArtifactInfo();
-
- ai.repository = context.getRepositoryId();
-
- ai.groupId = ra[0];
-
- ai.artifactId = ra[1];
-
- ai.version = ra[2];
-
- if ( ra.length > 3 )
- {
- ai.classifier = ArtifactInfo.renvl( ra[3] );
- }
-
- if ( ra.length > 4 )
- {
- ai.packaging = ArtifactInfo.renvl( ra[4] );
- }
-
- // minimal ArtifactContext for removal
- ArtifactContext ac = new ArtifactContext( null, null, null, ai, ai.calculateGav() );
-
- for ( int i = 0; i < collector.getTotalHits(); i++ )
- {
- if ( contextPath == null
- || context.getGavCalculator().gavToPath( ac.getGav() ).startsWith( contextPath ) )
- {
- indexerEngine.remove( context, ac );
- }
-
- deleted++;
- }
- }
- }
- }
- finally
- {
- context.releaseIndexSearcher( indexSearcher );
- }
-
- if ( deleted > 0 )
- {
- context.commit();
- }
-
- result.setDeletedFiles( deleted );
- }
-
-}
View
43 ...s-indexer-lucene-plugin/src/main/java/org/sonatype/nexus/index/DefaultIndexerManager.java
@@ -37,6 +37,8 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
@@ -48,6 +50,7 @@
import org.apache.maven.index.ArtifactInfo;
import org.apache.maven.index.ArtifactInfoFilter;
import org.apache.maven.index.ArtifactInfoPostprocessor;
+import org.apache.maven.index.DefaultScannerListener;
import org.apache.maven.index.Field;
import org.apache.maven.index.FlatSearchRequest;
import org.apache.maven.index.FlatSearchResponse;
@@ -124,8 +127,6 @@
import com.google.common.annotations.VisibleForTesting;
-import copied.org.apache.maven.index.DefaultScannerListener;
-
/**
* <p>
* Indexer Manager. This is a thin layer above Nexus Indexer and simply manages indexingContext additions, updates and
@@ -426,9 +427,41 @@ private void addRepositoryIndexContext( final Repository repository, IndexingCon
else
{
// add context for repository
- ctx =
- mavenIndexer.addIndexingContextForced( getContextId( repository.getId() ), repository.getId(),
- repoRoot, indexDirectory, null, null, indexCreators );
+ try
+ {
+ ctx = new DefaultIndexingContext( getContextId( repository.getId() ), // id
+ repository.getId(), // repositoryId
+ repoRoot, // repository
+ indexDirectory, // indexDirectoryFile
+ null, // repositoryUrl
+ null, // indexUpdateUrl
+ indexCreators, //
+ true // reclaimIndex
+ )
+ {
+ @Override
+ protected IndexWriterConfig getWriterConfig()
+ {
+ final IndexWriterConfig writerConfig = super.getWriterConfig();
+
+ // NEXUS-5380 force use of compound lucene index file to postpone "Too many open files"
+
+ final TieredMergePolicy mergePolicy = new TieredMergePolicy();
+ mergePolicy.setUseCompoundFile( true );
+ mergePolicy.setNoCFSRatio( 1.0 );
+
+ writerConfig.setMergePolicy( mergePolicy );
+
+ return writerConfig;
+ }
+ };
+ mavenIndexer.addIndexingContext( ctx );
+ }
+ catch ( UnsupportedExistingLuceneIndexException e )
+ {
+ // this will never happen because reclaimIndex=true
+ throw new IOException( e );
+ }
}
ctx.setSearchable( repository.isSearchable() );
View
21 ...dexer-lucene-plugin/src/test/java/org/sonatype/nexus/index/Nexus3578IndexerManagerIT.java
@@ -169,12 +169,31 @@ protected void hackContext( DefaultIndexingContext context )
creators.add( mavenArchetype );
creators.add( jar );
- Field indexCreatorsField = context.getClass().getDeclaredField( "indexCreators" );
+ Field indexCreatorsField = getIndexCreatorsField( context );
if ( indexCreatorsField != null )
{
indexCreatorsField.setAccessible( true );
indexCreatorsField.set( context, creators );
}
}
+
+ private Field getIndexCreatorsField( DefaultIndexingContext context )
+ throws NoSuchFieldException
+ {
+ Class<?> type = context.getClass();
+ do
+ {
+ try
+ {
+ return type.getDeclaredField( "indexCreators" );
+ }
+ catch ( NoSuchFieldException e )
+ {
+ type = type.getSuperclass();
+ }
+ }
+ while ( type != null );
+ throw new NoSuchFieldException();
+ }
}
View
4 pom.xml
@@ -102,8 +102,8 @@
<exclude>nexus-plugin-archetype/src/main/resources/archetype-resources/**</exclude>
<exclude>nexus/nexus-webapp/src/main/webapp/js/ext/ux/**</exclude>
<exclude>nexus/nexus-webapp/src/main/webapp/style/ext/ux/**</exclude>
- <!-- tell license plugin to ignore workaround for MINDEXER-65 -->
- <exclude>**/nexus-indexer-lucene-plugin/src/main/java/copied/org/apache/maven/index/DefaultScannerListener.java</exclude>
+ <!-- Exclude zion work directory -->
+ <exclude>zwork/**</exclude>
</excludes>
<mapping>
<vm>SHARPSTAR_STYLE</vm>

0 comments on commit a364b45

Please sign in to comment.