Skip to content

Commit

Permalink
Merge pull request #7251 from rishabhjain08/3.1
Browse files Browse the repository at this point in the history
added a trim-strings option to trim string entry types during data import.
  • Loading branch information
systay committed May 31, 2016
2 parents 6aeff10 + ae27810 commit 69b338b
Show file tree
Hide file tree
Showing 8 changed files with 142 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ public interface Configuration
*/
boolean multilineFields();

/**
* Whether or not strings should be trimmed for whitespaces.
*/
boolean trimStrings();

/**
* @return {@code true} for treating empty strings, i.e. {@code ""} as null, instead of an empty string.
*/
Expand Down Expand Up @@ -71,6 +76,12 @@ public boolean emptyQuotedStringsAsNull()
{
return false;
}

@Override
public boolean trimStrings()
{
return false;
}
}

Configuration DEFAULT = new Default();
Expand Down Expand Up @@ -107,5 +118,11 @@ public boolean emptyQuotedStringsAsNull()
{
return defaults.emptyQuotedStringsAsNull();
}

@Override
public boolean trimStrings()
{
return defaults.trimStrings();
}
}
}
29 changes: 23 additions & 6 deletions community/csv/src/main/java/org/neo4j/csv/reader/Extractors.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,12 @@ public class Extractors

public Extractors( char arrayDelimiter )
{
this( arrayDelimiter, Configuration.DEFAULT.emptyQuotedStringsAsNull() );
this( arrayDelimiter, Configuration.DEFAULT.emptyQuotedStringsAsNull(), Configuration.DEFAULT.trimStrings() );
}

public Extractors( char arrayDelimiter, boolean emptyStringsAsNull )
{
this( arrayDelimiter, emptyStringsAsNull, Configuration.DEFAULT.trimStrings() );
}

/**
Expand All @@ -94,7 +99,7 @@ public Extractors( char arrayDelimiter )
* something that would be impossible otherwise. There's an equivalent {@link #valueOf(String)}
* method to keep the feel of an enum.
*/
public Extractors( char arrayDelimiter, boolean emptyStringsAsNull )
public Extractors( char arrayDelimiter, boolean emptyStringsAsNull, boolean trimStrings )
{
try
{
Expand All @@ -110,7 +115,7 @@ public Extractors( char arrayDelimiter, boolean emptyStringsAsNull )
}
}

add( string = new StringExtractor( emptyStringsAsNull ) );
add( string = new StringExtractor( emptyStringsAsNull, trimStrings ) );
add( long_ = new LongExtractor() );
add( int_ = new IntExtractor() );
add( char_ = new CharExtractor() );
Expand All @@ -119,7 +124,7 @@ public Extractors( char arrayDelimiter, boolean emptyStringsAsNull )
add( boolean_ = new BooleanExtractor() );
add( float_ = new FloatExtractor() );
add( double_ = new DoubleExtractor() );
add( stringArray = new StringArrayExtractor( arrayDelimiter ) );
add( stringArray = new StringArrayExtractor( arrayDelimiter, trimStrings ) );
add( booleanArray = new BooleanArrayExtractor( arrayDelimiter ) );
add( byteArray = new ByteArrayExtractor( arrayDelimiter ) );
add( shortArray = new ShortArrayExtractor( arrayDelimiter ) );
Expand Down Expand Up @@ -282,11 +287,13 @@ public static class StringExtractor extends AbstractSingleValueExtractor<String>
{
private String value;
private final boolean emptyStringsAsNull;
private final boolean trimStrings;

public StringExtractor( boolean emptyStringsAsNull )
public StringExtractor( boolean emptyStringsAsNull, boolean trimStrings )
{
super( String.class.getSimpleName() );
this.emptyStringsAsNull = emptyStringsAsNull;
this.trimStrings = trimStrings;
}

@Override
Expand All @@ -305,6 +312,10 @@ protected boolean nullValue( int length, boolean skippedChars )
protected boolean extract0( char[] data, int offset, int length )
{
value = new String( data, offset, length );
if (trimStrings)
{
value = value.trim();
}
return true;
}

Expand Down Expand Up @@ -699,10 +710,12 @@ public boolean equals( Object obj )
private static class StringArrayExtractor extends ArrayExtractor<String[]>
{
private static final String[] EMPTY = new String[0];
private final boolean trimStrings;

StringArrayExtractor( char arrayDelimiter )
StringArrayExtractor( char arrayDelimiter, boolean trimStrings )
{
super( arrayDelimiter, String.class );
this.trimStrings = trimStrings;
}

@Override
Expand All @@ -714,6 +727,10 @@ protected void extract0( char[] data, int offset, int length )
{
int numberOfChars = charsToNextDelimiter( data, offset+charIndex, length-charIndex );
value[arrayIndex] = new String( data, offset+charIndex, numberOfChars );
if (trimStrings)
{
value[arrayIndex] = value[arrayIndex].trim();
}
charIndex += numberOfChars;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,38 @@ public void shouldExtractNullForEmptyQuotedStringIfConfiguredTo() throws Excepti
assertNull( extracted );
}

@Test
public void shouldTrimStringIfConfiguredTo() throws Exception
{
// GIVEN
Extractors extractors = new Extractors( ',', true, true);
String value = " abcde fgh ";

// WHEN
char[] asChars = value.toCharArray();
Extractor<String> extractor = extractors.string();
extractor.extract( asChars, 0, asChars.length, true );

// THEN
assertEquals( value.trim(), extractor.value() );
}

@Test
public void shouldNotTrimStringIfNotConfiguredTo() throws Exception
{
// GIVEN
Extractors extractors = new Extractors( ',', true, false);
String value = " abcde fgh ";

// WHEN
char[] asChars = value.toCharArray();
Extractor<String> extractor = extractors.string();
extractor.extract( asChars, 0, asChars.length, true );

// THEN
assertEquals( value, extractor.value() );
}

private String toString( long[] values, char delimiter )
{
StringBuilder builder = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ object CSVResources {
override def multilineFields(): Boolean = true

override def emptyQuotedStringsAsNull(): Boolean = true

override def trimStrings(): Boolean = false
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ enum Options
"<true/false>",
"Whether or not fields from input source can span multiple lines, i.e. contain newline characters." ),

TRIM_STRINGS( "trim-strings", org.neo4j.csv.reader.Configuration.DEFAULT.trimStrings(),
"<true/false>",
"Whether or not strings should be trimmed for whitespaces."),

INPUT_ENCODING( "input-encoding", null,
"<character set>",
"Character set that input data is encoded in. Provided value must be one out of the available "
Expand Down Expand Up @@ -673,6 +677,7 @@ private static Configuration csvConfiguration( Args args, final boolean defaultS
CHARACTER_CONVERTER );
final Boolean multiLineFields = args.getBoolean( Options.MULTILINE_FIELDS.key(), null );
final Boolean emptyStringsAsNull = args.getBoolean( Options.IGNORE_EMPTY_STRINGS.key(), null );
final Boolean trimStrings = args.getBoolean( Options.TRIM_STRINGS.key(), null);
return new Configuration.Default()
{
@Override
Expand Down Expand Up @@ -720,6 +725,14 @@ public int bufferSize()
{
return defaultSettingsSuitableForTests ? 10_000 : super.bufferSize();
}

@Override
public boolean trimStrings()
{
return trimStrings != null
? trimStrings.booleanValue()
: defaultConfiguration.trimStrings();
}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,59 @@ public void shouldDisallowMultilineFieldsByDefault() throws Exception
}
}

@Test
public void shouldNotTrimStringsByDefault() throws Exception
{
// GIVEN
String name = " This is a line with leading and trailing whitespaces ";
File data = data( ":ID,name", "1,\"" + name + "\"");

// WHEN
importTool(
"--into", dbRule.getStoreDirAbsolutePath(),
"--nodes", data.getAbsolutePath() );

// THEN
GraphDatabaseService db = dbRule.getGraphDatabaseAPI();
try ( Transaction tx = db.beginTx() )
{
ResourceIterator<Node> allNodes = db.getAllNodes().iterator();
Node node = Iterators.single( allNodes );
allNodes.close();

assertEquals( name, node.getProperty( "name" ) );

tx.success();
}
}

@Test
public void shouldTrimStringsIfConfiguredTo() throws Exception
{
// GIVEN
String name = " This is a line with leading and trailing whitespaces ";
File data = data( ":ID,name", "1,\"" + name + "\"");

// WHEN
importTool(
"--into", dbRule.getStoreDirAbsolutePath(),
"--nodes", data.getAbsolutePath(),
"--trim-strings", "true" );

// THEN
GraphDatabaseService db = dbRule.getGraphDatabaseAPI();
try ( Transaction tx = db.beginTx() )
{
ResourceIterator<Node> allNodes = db.getAllNodes().iterator();
Node node = Iterators.single( allNodes );
allNodes.close();

assertEquals( name.trim(), node.getProperty( "name" ) );

tx.success();
}
}

@Test
public void shouldPrintReferenceLinkOnDataImportErrors() throws Exception
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ public Header create( CharSeeker dataSeeker, Configuration config, IdType idType
{
headerSeeker = headerCharSeekerFactory.open( dataSeeker, config );
Mark mark = new Mark();
Extractors extractors = new Extractors( config.arrayDelimiter(), config.emptyQuotedStringsAsNull() );
Extractors extractors = new Extractors( config.arrayDelimiter(), config.emptyQuotedStringsAsNull(), config.trimStrings() );
Extractor<?> idExtractor = idType.extractor( extractors );
int delimiter = config.delimiter();
List<Header.Entry> columns = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class InputEntityDeserializer<ENTITY extends InputEntity>
private final Function<ENTITY,ENTITY> decorator;
private final Deserialization<ENTITY> deserialization;
private final Validator<ENTITY> validator;
private final Extractors.StringExtractor stringExtractor = new Extractors.StringExtractor( false );
private final Extractors.StringExtractor stringExtractor = new Extractors.StringExtractor( false, false );
private final Collector badCollector;

InputEntityDeserializer( Header header, CharSeeker data, int delimiter,
Expand Down

0 comments on commit 69b338b

Please sign in to comment.