Skip to content

Commit

Permalink
Able to ignore empty columns from CSV input source
Browse files Browse the repository at this point in the history
  • Loading branch information
tinwelint committed Nov 9, 2015
1 parent abdb1ce commit e33216c
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 13 deletions.
Expand Up @@ -19,8 +19,6 @@
*/
package org.neo4j.tooling;

import java.io.OutputStream;

import org.neo4j.csv.reader.SourceTraceability;
import org.neo4j.function.Function;
import org.neo4j.unsafe.impl.batchimport.BatchImporter;
Expand All @@ -29,7 +27,6 @@
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdGenerator;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper;
import org.neo4j.unsafe.impl.batchimport.input.Collector;
import org.neo4j.unsafe.impl.batchimport.input.Collectors;
import org.neo4j.unsafe.impl.batchimport.input.Groups;
import org.neo4j.unsafe.impl.batchimport.input.Input;
import org.neo4j.unsafe.impl.batchimport.input.InputNode;
Expand Down
Expand Up @@ -19,8 +19,6 @@
*/
package org.neo4j.unsafe.impl.batchimport.input;

import java.io.OutputStream;

import org.neo4j.unsafe.impl.batchimport.BatchImporter;
import org.neo4j.unsafe.impl.batchimport.InputIterable;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdGenerator;
Expand Down
Expand Up @@ -20,8 +20,6 @@
package org.neo4j.unsafe.impl.batchimport.input;

import java.io.File;
import java.io.OutputStream;

import org.neo4j.unsafe.impl.batchimport.InputIterable;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdGenerator;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.IdMapper;
Expand Down
Expand Up @@ -19,7 +19,6 @@
*/
package org.neo4j.unsafe.impl.batchimport.input.csv;

import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;

Expand Down
Expand Up @@ -20,14 +20,12 @@
package org.neo4j.unsafe.impl.batchimport.input.csv;

import java.io.IOException;
import java.util.ArrayList;

import org.neo4j.csv.reader.CharSeeker;
import org.neo4j.csv.reader.Extractors;
import org.neo4j.csv.reader.Mark;
import org.neo4j.function.Function;
import org.neo4j.helpers.Exceptions;
import org.neo4j.helpers.Pair;
import org.neo4j.helpers.collection.PrefetchingIterator;
import org.neo4j.kernel.impl.util.Validator;
import org.neo4j.unsafe.impl.batchimport.InputIterator;
Expand Down Expand Up @@ -91,9 +89,11 @@ protected ENTITY fetchNextOrNull()
// less columns than the data. Prints in close() so it only happens once per file.
while ( !mark.isEndOfLine() )
{
long lineNumber = data.lineNumber();
data.seek( mark, delimiter );
data.extract( mark, stringExtractor );
badCollector.collectExtraColumns( data.sourceDescription(), data.lineNumber(), stringExtractor.value() );
data.tryExtract( mark, stringExtractor );
badCollector.collectExtraColumns(
data.sourceDescription(), lineNumber, stringExtractor.value() );
}

entity = decorator.apply( entity );
Expand Down
Expand Up @@ -62,7 +62,6 @@
import org.neo4j.unsafe.impl.batchimport.Configuration;
import org.neo4j.unsafe.impl.batchimport.Configuration.Default;
import org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter;
import org.neo4j.unsafe.impl.batchimport.input.Collectors;
import org.neo4j.unsafe.impl.batchimport.input.InputNode;
import org.neo4j.unsafe.impl.batchimport.input.InputRelationship;

Expand Down
Expand Up @@ -37,6 +37,7 @@
import org.neo4j.test.TargetDirectory;
import org.neo4j.test.TargetDirectory.TestDirectory;
import org.neo4j.unsafe.impl.batchimport.InputIterator;
import org.neo4j.unsafe.impl.batchimport.input.Collector;
import org.neo4j.unsafe.impl.batchimport.input.DataException;
import org.neo4j.unsafe.impl.batchimport.input.Group;
import org.neo4j.unsafe.impl.batchimport.input.Groups;
Expand All @@ -53,6 +54,9 @@
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Matchers.anyString;
import static org.mockito.Matchers.eq;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
Expand Down Expand Up @@ -777,6 +781,33 @@ public boolean emptyQuotedStringsAsNull()
}
}

@Test
public void shouldIgnoreEmptyExtraColumns() throws Exception
{
// GIVEN
Iterable<DataFactory<InputNode>> data = DataFactories.nodeData( CsvInputTest.<InputNode>data(
":ID,one\n" +
"1,test,\n" +
"2,test,,additional" ) );

// WHEN
Collector collector = mock( Collector.class );
Input input = new CsvInput( data, defaultFormatNodeFileHeader(),
null, null, IdType.INTEGER, COMMAS, collector );

// THEN
try ( InputIterator<InputNode> nodes = input.nodes().iterator() )
{
// THEN
assertNode( nodes.next(), 1L, properties( "one", "test" ), labels() );
assertNode( nodes.next(), 2L, properties( "one", "test" ), labels() );
assertFalse( nodes.hasNext() );
}
verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 1l ), eq( (String)null ) );
verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 2l ), eq( (String)null ) );
verify( collector, times( 1 ) ).collectExtraColumns( anyString(), eq( 2l ), eq( "additional" ) );
}

private Configuration customConfig( final char delimiter, final char arrayDelimiter, final char quote )
{
return new Configuration.Default()
Expand Down

0 comments on commit e33216c

Please sign in to comment.