-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
Configuration.java
203 lines (175 loc) · 6.66 KB
/
Configuration.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
/**
* Copyright (c) 2002-2015 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.unsafe.impl.batchimport;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.kernel.configuration.Config;
import static java.lang.Math.max;
import static java.lang.Math.round;
/**
* User controlled configuration for a {@link BatchImporter}.
*/
public interface Configuration extends org.neo4j.unsafe.impl.batchimport.staging.Configuration
{
/**
* File name in which bad entries from the import will end up. This file will be created in the
* database directory of the imported database, i.e. <into>/bad.log.
*/
String BAD_FILE_NAME = "bad.log";
/**
* Memory dedicated to buffering data to be written to each store file.
*/
int fileChannelBufferSize();
/**
* Some files require a bigger buffer to avoid some performance culprits imposed by the OS.
* This is a multiplier for how many times bigger such buffers are compared to {@link #fileChannelBufferSize()}.
*/
int bigFileChannelBufferSizeMultiplier();
/**
* The number of relationships threshold for considering a node dense.
*/
int denseNodeThreshold();
/**
* Max number of I/O threads doing file write operations. Optimal value for this setting is heavily
* dependent on hard drive. A spinning disk is most likely best off with 1, where an SSD may see
* better performance with a handful of threads writing to it simultaneously.
* This value eats into the cake of {@link #maxNumberOfProcessors()}. The total number of threads
* used by the importer at any given time is {@link #maxNumberOfProcessors()}, out of those
* a maximum number of I/O threads can be used.
* "Processor" in the context of the batch importer is different from "thread" since when discovering
* how many processors are fully in use there's a calculation where one thread takes up 0 < fraction <= 1
* of a processor.
*/
int maxNumberOfIoProcessors();
/**
* Rough max number of processors (CPU cores) simultaneously used in total by importer at any given time.
* This value should be set including {@link #maxNumberOfIoProcessors()} in mind.
* Defaults to the value provided by the {@link Runtime#availableProcessors() jvm}. There's a discrete
* number of threads that needs to be used just to get the very basics of the import working,
* so for that reason there's no lower bound to this value.
* "Processor" in the context of the batch importer is different from "thread" since when discovering
* how many processors are fully in use there's a calculation where one thread takes up 0 < fraction <= 1
* of a processor.
*/
int maxNumberOfProcessors();
class Default
extends org.neo4j.unsafe.impl.batchimport.staging.Configuration.Default
implements Configuration
{
private static final int OPTIMAL_FILE_CHANNEL_CHUNK_SIZE = 1024 * 4;
@Override
public int batchSize()
{
return 10_000;
}
@Override
public int fileChannelBufferSize()
{
// Do a little calculation here where the goal of the returned value is that if a file channel
// would be seen as a batch itself (think asynchronous writing) there would be created roughly
// as many as the other types of batches.
return roundToClosest( batchSize() * 40 /*some kind of record size average*/,
OPTIMAL_FILE_CHANNEL_CHUNK_SIZE );
}
@Override
public int bigFileChannelBufferSizeMultiplier()
{
return 50;
}
private int roundToClosest( int value, int divisible )
{
double roughCount = (double) value / divisible;
int count = (int) round( roughCount );
return divisible*count;
}
@Override
public int workAheadSize()
{
return 20;
}
@Override
public int denseNodeThreshold()
{
return Integer.parseInt( GraphDatabaseSettings.dense_node_threshold.getDefaultValue() );
}
@Override
public int maxNumberOfIoProcessors()
{
return max( 2, Runtime.getRuntime().availableProcessors()/3 );
}
@Override
public int maxNumberOfProcessors()
{
return Runtime.getRuntime().availableProcessors();
}
@Override
public int movingAverageSize()
{
return 100;
}
}
Configuration DEFAULT = new Default();
class Overridden
extends org.neo4j.unsafe.impl.batchimport.staging.Configuration.Overridden
implements Configuration
{
private final Configuration defaults;
private final Config config;
public Overridden( Configuration defaults, Config config )
{
super( defaults );
this.defaults = defaults;
this.config = config;
}
public Overridden( Config config )
{
this( Configuration.DEFAULT, config );
}
@Override
public int fileChannelBufferSize()
{
return defaults.fileChannelBufferSize();
}
@Override
public int bigFileChannelBufferSizeMultiplier()
{
return defaults.bigFileChannelBufferSizeMultiplier();
}
@Override
public int denseNodeThreshold()
{
return config.get( GraphDatabaseSettings.dense_node_threshold );
}
@Override
public int maxNumberOfIoProcessors()
{
return defaults.maxNumberOfIoProcessors();
}
@Override
public int maxNumberOfProcessors()
{
return defaults.maxNumberOfProcessors();
}
@Override
public int movingAverageSize()
{
return defaults.movingAverageSize();
}
}
}