Skip to content

Commit

Permalink
Number of fields growth in legacy indexes during remove operation.
Browse files Browse the repository at this point in the history
During removal from legacy indexes we try to restore lost after reading numeric fields.
Mistake in a code that determine what fields needs to be removed and added again generate fabulous growth of fields in a document.
As result it's impossible to perform almost any operation on affected documents.

This PR fix restoration code for custom and exact index types and introduce couple of tests that verify number of fields in manipulated documents.
  • Loading branch information
MishaDemianenko committed Oct 6, 2016
1 parent f9280aa commit f4af82c
Show file tree
Hide file tree
Showing 2 changed files with 122 additions and 5 deletions.
Expand Up @@ -33,6 +33,7 @@
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
Expand Down Expand Up @@ -87,7 +88,7 @@ void removeFieldsFromDocument( Document document, String key, Object value )
return;
}
}
document.removeFields( key );
removeFieldFromDocument( document, key );
if ( value != null )
{
for ( String existingValue : values )
Expand All @@ -99,6 +100,12 @@ void removeFieldsFromDocument( Document document, String key, Object value )
restoreNumericFields( document );
}

@Override
void removeFieldFromDocument( Document document, String name )
{
document.removeFields( name );
}

@Override
public String toString()
{
Expand Down Expand Up @@ -145,6 +152,13 @@ public void addToDocument( Document document, String key, Object value )
document.add( instantiateSortField( key, value ) );
}

@Override
void removeFieldFromDocument( Document document, String name )
{
document.removeFields( exactKey( name ) );
document.removeFields( name );
}

@Override
void removeFieldsFromDocument( Document document, String key, Object value )
{
Expand All @@ -160,7 +174,7 @@ void removeFieldsFromDocument( Document document, String key, Object value )
}
}
document.removeFields( exactKey );
document.removeFields( key );
removeFieldFromDocument( document, key );
if ( value != null )
{
for ( String existingValue : values )
Expand Down Expand Up @@ -373,9 +387,11 @@ final void removeFromDocument( Document document, String key, Object value )

abstract void removeFieldsFromDocument( Document document, String key, Object value );

abstract void removeFieldFromDocument( Document document, String name );

private void clearDocument( Document document )
{
Set<String> names = new HashSet<String>();
Set<String> names = new HashSet<>();
for ( IndexableField field : document.getFields() )
{
names.add( field.name() );
Expand All @@ -393,14 +409,15 @@ protected void restoreNumericFields( Document document )
List<IndexableField> numericFields = new ArrayList<>();
for ( IndexableField field : document.getFields() )
{
if ( field.numericValue() != null && !field.name().equals( LuceneLegacyIndex.KEY_DOC_ID ) )
if ( field.numericValue() != null && !field.name().equals( LuceneLegacyIndex.KEY_DOC_ID ) &&
DocValuesType.NONE.equals( field.fieldType().docValuesType() ) )
{
numericFields.add( field );
}
}
for ( IndexableField field : numericFields )
{
document.removeField( field.name() );
removeFieldFromDocument( document, field.name() );
addToDocument( document, field.name(), field.numericValue() );
}
}
Expand Down
@@ -0,0 +1,100 @@
/*
* Copyright (c) 2002-2016 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.index.impl.lucene.legacy;


import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexableField;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.util.Arrays;
import java.util.Map;

import org.neo4j.helpers.collection.MapUtil;

import static org.junit.Assert.assertEquals;

@RunWith( Parameterized.class )
public class IndexTypeTest
{

private static final String STRING_TEST_FIELD = "testString";
private static final String STRING_TEST_FIELD2 = "testString2";
private static final String NUMERIC_TEST_FIELD = "testNumeric";
private static final String NUMERIC_TEST_FIELD2 = "testNumeric2";

@Parameterized.Parameter( 0 )
public IndexType indexType;
@Parameterized.Parameter( 1 )
public int documentFieldsPerUserField;

@Parameterized.Parameters(name = "{0}")
public static Iterable<Object> indexTypes()
{
Map<String,String> customIndexTypeConfig = MapUtil.stringMap( LuceneIndexImplementation.KEY_TYPE, "exact",
LuceneIndexImplementation.KEY_TO_LOWER_CASE, "true" );
return Arrays.asList( new Object[]{IndexType.EXACT, 2},
new Object[]{IndexType.getIndexType( customIndexTypeConfig ), 3} );
}

@Test
public void removeFromExactIndexedDocumentRetainCorrectNumberOfFields() throws Exception
{
Document document = new Document();
indexType.addToDocument( document, STRING_TEST_FIELD, "value" );
indexType.addToDocument( document, STRING_TEST_FIELD2, "value2" );
indexType.addToDocument( document, NUMERIC_TEST_FIELD, 1 );
indexType.addToDocument( document, NUMERIC_TEST_FIELD2, 2 );
indexType.removeFromDocument( document, STRING_TEST_FIELD, null );
assertEquals("Usual fields, doc values fields for user fields and housekeeping fields.",
documentFieldsPerUserField * 3, document.getFields().size() );
assertEquals("Two string fields with specified name expected.",
2, getDocumentFields( document, STRING_TEST_FIELD2 ).length );
assertEquals("Two numeric fields with specified name expected.",
2, getDocumentFields( document, NUMERIC_TEST_FIELD ).length );
assertEquals("Two numeric fields with specified name expected.",
2, getDocumentFields( document, NUMERIC_TEST_FIELD2 ).length );
}

@Test
public void removeFieldFromExactIndexedDocumentRetainCorrectNumberOfFields() throws Exception
{
Document document = new Document();
indexType.addToDocument( document, STRING_TEST_FIELD, "value" );
indexType.addToDocument( document, STRING_TEST_FIELD2, "value2" );
indexType.addToDocument( document, NUMERIC_TEST_FIELD, 1 );
indexType.addToDocument( document, NUMERIC_TEST_FIELD2, 2 );
indexType.removeFieldsFromDocument( document, NUMERIC_TEST_FIELD, null );
indexType.removeFieldsFromDocument( document, STRING_TEST_FIELD2, null );
assertEquals("Usual fields, doc values fields for user fields and housekeeping fields.",
documentFieldsPerUserField * 2, document.getFields().size() );
assertEquals("Two string fields with specified name expected.",
2, getDocumentFields( document, STRING_TEST_FIELD ).length );
assertEquals("Two numeric fields with specified name expected.",
2, getDocumentFields( document, NUMERIC_TEST_FIELD2 ).length );
}

private IndexableField[] getDocumentFields( Document document, String name )
{
return document.getFields( name );
}
}

0 comments on commit f4af82c

Please sign in to comment.