Skip to content

Commit

Permalink
Merge pull request #8967 from tinwelint/3.0-unlimited-bad-tolerance
Browse files Browse the repository at this point in the history
Ability to set unlimited bad tolerance in import tool
  • Loading branch information
MishaDemianenko committed Mar 17, 2017
2 parents b202119 + bd66305 commit 49b282a
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 4 deletions.
Expand Up @@ -60,6 +60,7 @@
import org.neo4j.unsafe.impl.batchimport.BatchImporter;
import org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.DuplicateInputIdException;
import org.neo4j.unsafe.impl.batchimport.input.BadCollector;
import org.neo4j.unsafe.impl.batchimport.input.Collector;
import org.neo4j.unsafe.impl.batchimport.input.Input;
import org.neo4j.unsafe.impl.batchimport.input.InputException;
Expand Down Expand Up @@ -99,6 +100,7 @@
*/
public class ImportTool
{
private static final String UNLIMITED = "true";
private static final int UNSPECIFIED = -1;

enum Options
Expand Down Expand Up @@ -171,7 +173,7 @@ enum Options
"<true/false>",
"Enable printing of error stack traces." ),
BAD_TOLERANCE( "bad-tolerance", 1000,
"<max number of bad entries>",
"<max number of bad entries, or " + UNLIMITED + " for unlimited>",
"Number of bad entries before the import is considered failed. This tolerance threshold is "
+ "about relationships refering to missing nodes. Format errors in input data are "
+ "still treated as errors" ),
Expand Down Expand Up @@ -357,8 +359,7 @@ public static void main( String[] incomingArguments, boolean defaultSettingsSuit
processors = args.getNumber( Options.PROCESSORS.key(), null );
IdType idType = args.interpretOption( Options.ID_TYPE.key(),
withDefault( (IdType)Options.ID_TYPE.defaultValue() ), TO_ID_TYPE );
badTolerance = args.getNumber( Options.BAD_TOLERANCE.key(),
(Number) Options.BAD_TOLERANCE.defaultValue() ).intValue();
badTolerance = parseNumberOrUnlimited( args, Options.BAD_TOLERANCE );
inputEncoding = Charset.forName( args.get( Options.INPUT_ENCODING.key(), defaultCharset().name() ) );
skipBadRelationships = args.getBoolean( Options.SKIP_BAD_RELATIONSHIPS.key(),
(Boolean)Options.SKIP_BAD_RELATIONSHIPS.defaultValue(), true );
Expand Down Expand Up @@ -458,6 +459,12 @@ idType, csvConfiguration( args, defaultSettingsSuitableForTests ), badCollector,
}
}

private static Integer parseNumberOrUnlimited( Args args, Options option )
{
String value = args.get( option.key(), option.defaultValue().toString() );
return UNLIMITED.equals( value ) ? BadCollector.UNLIMITED_TOLERANCE : Integer.parseInt( value );
}

private static Config loadDbConfig( File file ) throws IOException
{
return file != null && file.exists() ? new Config( MapUtil.load( file ) ) : Config.defaults();
Expand Down
Expand Up @@ -29,6 +29,7 @@
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
Expand Down Expand Up @@ -1138,6 +1139,23 @@ public void shouldPrintReferenceLinkOnDataImportErrors() throws Exception
"/import/import-tool-header-format/#import-tool-id-spaces" );
}

@Test
public void shouldCollectUnlimitedNumberOfBadEntries() throws Exception
{
// GIVEN
List<String> nodeIds = Collections.nCopies( 10_000, "A" );

// WHEN
importTool(
"--into", dbRule.getStoreDirAbsolutePath(),
"--nodes", nodeData( true, Configuration.COMMAS, nodeIds, TRUE ).getAbsolutePath(),
"--skip-duplicate-nodes",
"--bad-tolerance", "true" );

// THEN
// all those duplicates should just be accepted using the - for specifying bad tolerance
}

private void shouldPrintReferenceLinkAsPartOfErrorMessage( List<String> nodeIds,
Iterator<RelationshipDataLine> relationshipDataLines, String message ) throws Exception
{
Expand Down
Expand Up @@ -60,6 +60,7 @@ private interface ProblemReporter
// volatile since one importer thread calls collect(), where this value is incremented and later the "main"
// thread calls badEntries() to get a count.
private volatile int badEntries;
public static final int UNLIMITED_TOLERANCE = -1;

public BadCollector( OutputStream out, int tolerance, int collect )
{
Expand Down Expand Up @@ -171,7 +172,7 @@ private void checkTolerance( int bit, ProblemReporter report )
badEntries++;
}

if ( !collect || badEntries > tolerance )
if ( !collect || (tolerance != BadCollector.UNLIMITED_TOLERANCE && badEntries > tolerance) )
{
InputException exception = report.exception();
throw collect
Expand Down
40 changes: 40 additions & 0 deletions community/kernel/src/test/java/org/neo4j/helpers/ArgsTest.java
@@ -0,0 +1,40 @@
/*
* Copyright (c) 2002-2017 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.helpers;

import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class ArgsTest
{
@Test
public void shoulInterpretSingleDashAsValue() throws Exception
{
// GIVEN
Args args = Args.parse( "-test", "-" );

// WHEN
String value = args.get( "test" );

// THEN
assertEquals( "-", value );
}
}
Expand Up @@ -36,6 +36,8 @@
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import static org.neo4j.unsafe.impl.batchimport.input.BadCollector.COLLECT_ALL;
import static org.neo4j.unsafe.impl.batchimport.input.BadCollector.UNLIMITED_TOLERANCE;
import static org.neo4j.unsafe.impl.batchimport.input.BadCollectorTest.InputRelationshipBuilder.inputRelationship;

public class BadCollectorTest
Expand Down Expand Up @@ -202,6 +204,23 @@ public void shouldProvideNodeIdsSorted() throws Exception
assertArrayEquals( new long[] {8, 10, 12}, nodeIds );
}

@Test
public void shouldCollectUnlimitedNumberOfBadEntriesIfToldTo() throws Exception
{
// GIVEN
BadCollector collector = new BadCollector( new NullOutputStream(), UNLIMITED_TOLERANCE, COLLECT_ALL );

// WHEN
int count = 10_000;
for ( int i = 0; i < count; i++ )
{
collector.collectDuplicateNode( i, i, "group", "first", "other" );
}

// THEN
assertEquals( count, collector.badEntries() );
}

private OutputStream badOutputFile() throws IOException
{
File badDataPath = new File( "/tmp/foo2" ).getAbsoluteFile();
Expand Down Expand Up @@ -240,4 +259,12 @@ private File badDataFile( FileSystemAbstraction fileSystem, File badDataPath ) t
fileSystem.create( badDataPath );
return badDataPath;
}

private static class NullOutputStream extends OutputStream
{
@Override
public void write( int b ) throws IOException
{ // Don't
}
}
}

0 comments on commit 49b282a

Please sign in to comment.