Skip to content

Commit

Permalink
Update legacy indexes to preserve documents sort fields on update
Browse files Browse the repository at this point in the history
Legacy indexes use stored fields to store user data and sorted doc value typed fields to make sorting smooth.
After retrieval of any saved document grom index store only stored fields are visible (not sort fields)
and in case if we will re-save that document we will loose any sorted fields that we had before in the index for that document.
This PR introduce functionality that will restore all 'missing' sort fields to make sure that documents can be always sorted.

(cherry picked from commit ead0adb)
  • Loading branch information
MishaDemianenko committed Nov 30, 2016
1 parent 24ddc8f commit 7eb4bc5
Show file tree
Hide file tree
Showing 3 changed files with 331 additions and 124 deletions.
Expand Up @@ -44,10 +44,9 @@
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.NumericUtils;


import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;


Expand All @@ -68,36 +67,16 @@ public Query get( String key, Object value )
} }


@Override @Override
public void addToDocument( Document document, String key, Object value ) void removeFieldsFromDocument( Document document, String key, Object value )
{ {
document.add( instantiateField( key, value, StringField.TYPE_STORED ) ); removeFieldsFromDocument( document, key, key, value );
document.add( instantiateSortField( key, value ) );
} }


@Override @Override
void removeFieldsFromDocument( Document document, String key, Object value ) protected void addNewFieldToDocument( Document document, String key, Object value )
{ {
Set<String> values = null; document.add( instantiateField( key, value, StringField.TYPE_STORED ) );
if ( value != null ) document.add( instantiateSortField( key, value ) );
{
String stringValue = value.toString();
values = new HashSet<>( Arrays.asList(
document.getValues( key ) ) );
if ( !values.remove( stringValue ) )
{
return;
}
}
removeFieldFromDocument( document, key );
if ( value != null )
{
for ( String existingValue : values )
{
addToDocument( document, key, existingValue );
}
}

restoreNumericFields( document );
} }


@Override @Override
Expand All @@ -115,6 +94,7 @@ public String toString()


private static class CustomType extends IndexType private static class CustomType extends IndexType
{ {
public static final String EXACT_FIELD_SUFFIX = "_e";
private final Similarity similarity; private final Similarity similarity;


CustomType( Analyzer analyzer, boolean toLowerCase, Similarity similarity ) CustomType( Analyzer analyzer, boolean toLowerCase, Similarity similarity )
Expand All @@ -139,14 +119,14 @@ public Query get( String key, Object value )


private String exactKey( String key ) private String exactKey( String key )
{ {
return key + "_e"; return key + EXACT_FIELD_SUFFIX;
} }


// TODO We should honor ValueContext instead of doing value.toString() here.
// if changing it, also change #get to honor ValueContext.
@Override @Override
public void addToDocument( Document document, String key, Object value ) protected void addNewFieldToDocument( Document document, String key, Object value )
{ {
// TODO We should honor ValueContext instead of doing value.toString() here.
// if changing it, also change #get to honor ValueContext.
document.add( new StringField( exactKey( key ), value.toString(), Store.YES ) ); document.add( new StringField( exactKey( key ), value.toString(), Store.YES ) );
document.add( instantiateField( key, value, TextField.TYPE_STORED ) ); document.add( instantiateField( key, value, TextField.TYPE_STORED ) );
document.add( instantiateSortField( key, value ) ); document.add( instantiateSortField( key, value ) );
Expand All @@ -159,31 +139,22 @@ void removeFieldFromDocument( Document document, String name )
document.removeFields( name ); document.removeFields( name );
} }


@Override
protected boolean haveSortedField( IndexableField field )
{
return !field.name().endsWith( EXACT_FIELD_SUFFIX ) && super.haveSortedField( field );
}

@Override @Override
void removeFieldsFromDocument( Document document, String key, Object value ) void removeFieldsFromDocument( Document document, String key, Object value )
{ {
String exactKey = exactKey( key ); removeFieldsFromDocument( document, key, exactKey( key ), value );
Set<String> values = null; }
if ( value != null )
{
String stringValue = value.toString();
values = new HashSet<>( Arrays.asList( document.getValues( exactKey ) ) );
if ( !values.remove( stringValue ) )
{
return;
}
}
document.removeFields( exactKey );
removeFieldFromDocument( document, key );
if ( value != null )
{
for ( String existingValue : values )
{
addToDocument( document, key, existingValue );
}
}


restoreNumericFields( document ); @Override
protected boolean isStoredField( IndexableField field )
{
return !field.name().endsWith( CustomType.EXACT_FIELD_SUFFIX ) && super.isStoredField( field );
} }


@Override @Override
Expand All @@ -202,6 +173,14 @@ private IndexType( Analyzer analyzer, boolean toLowerCase )
this.toLowerCase = toLowerCase; this.toLowerCase = toLowerCase;
} }


abstract void removeFieldsFromDocument( Document document, String key, Object value );

abstract void removeFieldFromDocument( Document document, String name );

abstract void addNewFieldToDocument( Document document, String key, Object value );

abstract Query get( String key, Object value );

static IndexType getIndexType( Map<String, String> config ) static IndexType getIndexType( Map<String, String> config )
{ {
String type = config.get( LuceneIndexImplementation.KEY_TYPE ); String type = config.get( LuceneIndexImplementation.KEY_TYPE );
Expand All @@ -213,14 +192,14 @@ static IndexType getIndexType( Map<String, String> config )
if ( type != null ) if ( type != null )
{ {
// Use the built in alternatives... "exact" or "fulltext" // Use the built in alternatives... "exact" or "fulltext"
if ( type.equals( "exact" ) ) if ( "exact".equals( type ) )
{ {
// In the exact case we default to false // In the exact case we default to false
boolean toLowerCase = TRUE.equals( toLowerCaseUnbiased ); boolean toLowerCase = TRUE.equals( toLowerCaseUnbiased );


result = toLowerCase ? new CustomType( new LowerCaseKeywordAnalyzer(), true, similarity ) : EXACT; result = toLowerCase ? new CustomType( new LowerCaseKeywordAnalyzer(), true, similarity ) : EXACT;
} }
else if ( type.equals( "fulltext" ) ) else if ( "fulltext".equals( type ) )
{ {
// In the fulltext case we default to true // In the fulltext case we default to true
boolean toLowerCase = !FALSE.equals( toLowerCaseUnbiased ); boolean toLowerCase = !FALSE.equals( toLowerCaseUnbiased );
Expand Down Expand Up @@ -252,6 +231,17 @@ else if ( type.equals( "fulltext" ) )
return result; return result;
} }


public void addToDocument( Document document, String key, Object value )
{
addNewFieldToDocument( document, key, value );
restoreSortFields( document );
}

protected boolean isStoredField( IndexableField field )
{
return field.fieldType().stored() && !FullTxData.TX_STATE_KEY.equals( field.name() );
}

private static boolean parseBoolean( String string, boolean valueIfNull ) private static boolean parseBoolean( String string, boolean valueIfNull )
{ {
return string == null ? valueIfNull : Boolean.parseBoolean( string ); return string == null ? valueIfNull : Boolean.parseBoolean( string );
Expand Down Expand Up @@ -284,8 +274,6 @@ private static <T> T getByClassName( Map<String, String> config, String configKe
return null; return null;
} }


abstract Query get( String key, Object value );

TxData newTxData( LuceneLegacyIndex index ) TxData newTxData( LuceneLegacyIndex index )
{ {
return new ExactTxData( index ); return new ExactTxData( index );
Expand Down Expand Up @@ -315,8 +303,6 @@ Query query( String keyOrNull, Object value, QueryContext contextOrNull )
} }
} }


abstract void addToDocument( Document document, String key, Object value );

public static IndexableField instantiateField( String key, Object value, FieldType fieldType ) public static IndexableField instantiateField( String key, Object value, FieldType fieldType )
{ {
IndexableField field; IndexableField field;
Expand Down Expand Up @@ -368,7 +354,14 @@ else if ( value instanceof Double )
} }
else else
{ {
field = new SortedSetDocValuesField( key, new BytesRef( value.toString() ) ); if ( LuceneLegacyIndex.KEY_DOC_ID.equals( key ) )
{
field = new NumericDocValuesField( key, Long.parseLong( value.toString() ) );
}
else
{
field = new SortedSetDocValuesField( key, new BytesRef( value.toString() ) );
}
} }
return field; return field;
} }
Expand All @@ -385,10 +378,6 @@ final void removeFromDocument( Document document, String key, Object value )
} }
} }


abstract void removeFieldsFromDocument( Document document, String key, Object value );

abstract void removeFieldFromDocument( Document document, String name );

private void clearDocument( Document document ) private void clearDocument( Document document )
{ {
Set<String> names = new HashSet<>(); Set<String> names = new HashSet<>();
Expand All @@ -403,25 +392,74 @@ private void clearDocument( Document document )
} }
} }


// Re-add numeric field since their index info is lost after reading the fields from the index store // Re-add field since their index info is lost after reading the fields from the index store
protected void restoreNumericFields( Document document ) void restoreSortFields( Document document )
{ {
List<IndexableField> numericFields = new ArrayList<>(); Map<String,Object> fieldsWithoutSortFields = new HashMap<>();
for ( IndexableField field : document.getFields() ) for ( IndexableField field : document.getFields() )
{ {
if ( field.numericValue() != null && !field.name().equals( LuceneLegacyIndex.KEY_DOC_ID ) && if ( isStoredField( field ) )
DocValuesType.NONE.equals( field.fieldType().docValuesType() ) )
{ {
numericFields.add( field ); IndexableField[] fields = document.getFields( field.name() );
if ( !haveSortField( fields ) )
{
fieldsWithoutSortFields.put( field.name(), getFieldValue( field ) );
}
} }
} }
for ( IndexableField field : numericFields ) for ( Map.Entry<String,Object> entry : fieldsWithoutSortFields.entrySet() )
{ {
removeFieldFromDocument( document, field.name() ); removeFieldsFromDocument( document, entry.getKey(), entry.getValue() );
addToDocument( document, field.name(), field.numericValue() ); addNewFieldToDocument( document, entry.getKey(), entry.getValue() );
} }
} }


void removeFieldsFromDocument( Document document, String key, String exactKey, Object value )
{
Set<String> values = null;
if ( value != null )
{
String stringValue = value.toString();
values = new HashSet<>( Arrays.asList( document.getValues( exactKey ) ) );
if ( !values.remove( stringValue ) )
{
return;
}
}
removeFieldFromDocument( document, key );

if ( value != null )
{
for ( String existingValue : values )
{
addNewFieldToDocument( document, key, existingValue );
}
}
}

private boolean haveSortField( IndexableField[] fields )
{
for ( IndexableField field : fields )
{
if ( !DocValuesType.NONE.equals( field.fieldType().docValuesType() ) )
{
return true;
}
}
return false;
}

protected boolean haveSortedField( IndexableField field )
{
return DocValuesType.NONE.equals( field.fieldType().docValuesType() ) && getFieldValue( field ) != null;
}

private Object getFieldValue( IndexableField field )
{
Number numericFieldValue = field.numericValue();
return numericFieldValue != null ? numericFieldValue : field.stringValue();
}

public static Document newBaseDocument( long entityId ) public static Document newBaseDocument( long entityId )
{ {
Document doc = new Document(); Document doc = new Document();
Expand Down
Expand Up @@ -19,37 +19,43 @@
*/ */
package org.neo4j.index.impl.lucene.legacy; package org.neo4j.index.impl.lucene.legacy;


import java.util.Map;

import org.junit.After; import org.junit.After;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.Before; import org.junit.Before;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Rule; import org.junit.Rule;
import org.junit.rules.TestName; import org.junit.rules.TestName;


import java.util.Map;

import org.neo4j.graphdb.GraphDatabaseService; import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node; import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.PropertyContainer; import org.neo4j.graphdb.PropertyContainer;
import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType; import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction; import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.graphdb.index.Index; import org.neo4j.graphdb.index.Index;
import org.neo4j.graphdb.index.RelationshipIndex; import org.neo4j.graphdb.index.RelationshipIndex;
import org.neo4j.helpers.collection.MapUtil; import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.test.TestGraphDatabaseFactory; import org.neo4j.test.TargetDirectory;

import static org.neo4j.helpers.collection.MapUtil.stringMap;


public abstract class AbstractLuceneIndexTest public abstract class AbstractLuceneIndexTest
{ {
@Rule @Rule
public final TestName testname = new TestName(); public final TestName testname = new TestName();
@ClassRule
public static TargetDirectory.TestDirectory testDirectory = TargetDirectory.testDirForTest( AbstractLuceneIndexTest.class );
protected static GraphDatabaseService graphDb; protected static GraphDatabaseService graphDb;
protected Transaction tx; protected Transaction tx;


@BeforeClass @BeforeClass
public static void setUpStuff() public static void setUpStuff()
{ {
graphDb = new TestGraphDatabaseFactory().newImpermanentDatabase(); graphDb = new GraphDatabaseFactory().newEmbeddedDatabase( testDirectory.graphDbDir() );
} }


@AfterClass @AfterClass
Expand Down Expand Up @@ -136,36 +142,19 @@ public void delete( Relationship entity )
} }
}; };


static class FastRelationshipCreator implements EntityCreator<Relationship>
{
private Node node, otherNode;

public Relationship create( Object... properties )
{
if ( node == null )
{
node = graphDb.createNode();
otherNode = graphDb.createNode();
}
Relationship rel = node.createRelationshipTo( otherNode, TEST_TYPE );
setProperties( rel, properties );
return rel;
}

public void delete( Relationship entity )
{
entity.delete();
}
}

private static void setProperties( PropertyContainer entity, Object... properties ) private static void setProperties( PropertyContainer entity, Object... properties )
{ {
for ( Map.Entry<String, Object> entry : MapUtil.map( properties ).entrySet() ) for ( Map.Entry<String, Object> entry : MapUtil.map( properties ).entrySet() )
{ {
entity.setProperty( entry.getKey(), entry.getValue() ); entity.setProperty( entry.getKey(), entry.getValue() );
} }
} }


protected Index<Node> nodeIndex()
{
return nodeIndex( currentIndexName(), stringMap() );
}

protected Index<Node> nodeIndex( Map<String, String> config ) protected Index<Node> nodeIndex( Map<String, String> config )
{ {
return nodeIndex( currentIndexName(), config ); return nodeIndex( currentIndexName(), config );
Expand Down

0 comments on commit 7eb4bc5

Please sign in to comment.