-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
CsvImporter.java
127 lines (116 loc) · 6 KB
/
CsvImporter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/*
* Copyright (c) 2002-2017 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.commandline.dbms;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.util.Collection;
import org.neo4j.commandline.admin.IncorrectUsage;
import org.neo4j.commandline.admin.OutsideWorld;
import org.neo4j.commandline.dbms.config.WrappedBatchImporterConfigurationForNeo4jAdmin;
import org.neo4j.commandline.dbms.config.WrappedCsvInputConfigurationForNeo4jAdmin;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.helpers.Args;
import org.neo4j.io.fs.FileSystemAbstraction;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.tooling.ImportTool;
import org.neo4j.unsafe.impl.batchimport.Configuration;
import org.neo4j.unsafe.impl.batchimport.input.BadCollector;
import org.neo4j.unsafe.impl.batchimport.input.Collector;
import org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput;
import org.neo4j.unsafe.impl.batchimport.input.csv.IdType;
import static java.nio.charset.Charset.defaultCharset;
import static org.neo4j.kernel.impl.util.Converters.withDefault;
import static org.neo4j.tooling.ImportTool.csvConfiguration;
import static org.neo4j.tooling.ImportTool.extractInputFiles;
import static org.neo4j.tooling.ImportTool.importConfiguration;
import static org.neo4j.tooling.ImportTool.nodeData;
import static org.neo4j.tooling.ImportTool.relationshipData;
import static org.neo4j.tooling.ImportTool.validateInputFiles;
import static org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector;
import static org.neo4j.unsafe.impl.batchimport.input.Collectors.collect;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader;
class CsvImporter implements Importer
{
private final Collection<Args.Option<File[]>> nodesFiles;
private final Collection<Args.Option<File[]>> relationshipsFiles;
private final IdType idType;
private final Charset inputEncoding;
private final Config databaseConfig;
private final Args args;
private final OutsideWorld outsideWorld;
private final String reportFileName;
private final boolean ignoreBadRelationships;
private final boolean ignoreDuplicateNodes;
private final boolean ignoreExtraColumns;
CsvImporter( Args args, Config databaseConfig, OutsideWorld outsideWorld ) throws IncorrectUsage
{
this.args = args;
this.outsideWorld = outsideWorld;
nodesFiles = extractInputFiles( args, "nodes", outsideWorld.errorStream() );
relationshipsFiles = extractInputFiles( args, "relationships", outsideWorld.errorStream() );
reportFileName =
args.interpretOption( "report-file", withDefault( ImportCommand.DEFAULT_REPORT_FILE_NAME ), s -> s );
ignoreExtraColumns = args.getBoolean( "ignore-extra-columns", false );
ignoreDuplicateNodes = args.getBoolean( "ignore-duplicate-nodes", false );
ignoreBadRelationships = args.getBoolean( "ignore-missing-nodes", false );
try
{
validateInputFiles( nodesFiles, relationshipsFiles );
}
catch ( IllegalArgumentException e )
{
throw new IncorrectUsage( e.getMessage() );
}
idType = args.interpretOption( "id-type", withDefault( IdType.STRING ),
from -> IdType.valueOf( from.toUpperCase() ) );
inputEncoding = Charset.forName( args.get( "input-encoding", defaultCharset().name() ) );
this.databaseConfig = databaseConfig;
}
@Override
public void doImport() throws IOException
{
FileSystemAbstraction fs = outsideWorld.fileSystem();
File storeDir = databaseConfig.get( GraphDatabaseSettings.database_path );
File logsDir = databaseConfig.get( GraphDatabaseSettings.logs_directory );
File reportFile = new File( reportFileName );
OutputStream badOutput = new BufferedOutputStream( fs.openAsOutputStream( reportFile, false ) );
Collector badCollector = badCollector( badOutput, isIgnoringSomething() ? BadCollector.UNLIMITED_TOLERANCE : 0,
collect( ignoreBadRelationships, ignoreDuplicateNodes, ignoreExtraColumns ) );
Configuration configuration = new WrappedBatchImporterConfigurationForNeo4jAdmin( importConfiguration(
null, false, databaseConfig, storeDir ) );
CsvInput input = new CsvInput(
nodeData( inputEncoding, nodesFiles ), defaultFormatNodeFileHeader(),
relationshipData( inputEncoding, relationshipsFiles ), defaultFormatRelationshipFileHeader(),
idType,
new WrappedCsvInputConfigurationForNeo4jAdmin( csvConfiguration( args, false ) ),
badCollector,
configuration.maxNumberOfProcessors(), !ignoreBadRelationships );
ImportTool.doImport( outsideWorld.errorStream(), outsideWorld.errorStream(), storeDir, logsDir, reportFile, fs,
nodesFiles, relationshipsFiles, false, input, this.databaseConfig, badOutput, configuration );
}
private boolean isIgnoringSomething()
{
return ignoreBadRelationships || ignoreDuplicateNodes || ignoreExtraColumns;
}
}