Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
195 lines (183 sloc) 6.764 kb
<?xml version="1.0"?>
<configuration>
<property>
<!-- run the job verbosely ? -->
<name>mongo.job.verbose</name>
<value>false</value>
</property>
<!-- it is *strongly* recommended for consistent behavior that you disable speculative execution in hadoop -->
<property>
<name>mapred.map.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<name>mapred.reduce.tasks.speculative.execution</name>
<value>false</value>
</property>
<property>
<!-- Run the job in the foreground and wait for response, or background it? -->
<name>mongo.job.background</name>
<value>false</value>
</property>
<property>
<!-- if the input collection is sharded, read each sharding chunk as Input Splits for Hadoop?
NOTE: Combining with 'mongo.input.split.read_from_shards' can cause erratic/unexpected
behavior as chunks will be forcibly read from their last known shard, rather than
through mongos which accounts for/allows for migrations.
You must also enable 'mongo.input.split.create_input_splits' for any chunk reading
to occur.
-->
<name>mongo.input.split.read_shard_chunks</name>
<value>true</value>
</property>
<property>
<!-- Read directly from the shard servers? If enabled, connects directly to each
configured shard host or replica set. If disabled (the default),
sends all reads through mongos.
For most users this shouldn't be enabled.
Left mutable for advanced users and/or the clinically insane.
-->
<name>mongo.input.split.read_from_shards</name>
<value>false</value>
</property>
<property>
<!--
Should mongo-hadoop attempt to calculate
InputSplits for you?
*** If your input collection is sharded:
You'll need to enable 'read_shard_chunks' also to read
individual chunk data into the system. Otherwise enabling this
will use the "Unsharded" mode as described below.
*** If your input collection is unsharded (or you didn't enable chunk reading):
If creating input splits is true (the default), 'mongo.input.split_size' will
control the approx. # of megabytes assigned per calculated InputSplit.
You'll probably want to set 'mongo.input.split.split_key_pattern' as well
to control the split point (defaults to {_id: 1}).
If this is false, only one InputSplit will be created which is fairly inefficient
Hadoopery.
-->
<name>mongo.input.split.create_input_splits</name>
<value>true</value>
</property>
<property>
<!-- If working on an unsharded collection and 'create_input_splits' is enabled,
this keyPattern will be used to calculate the splitpoint. It needs to be a JSON
keyPattern spec following the *Exact same rules* as choosing a shard key for MongoDB
on an existing collection which contains data.
That includes the key being indexed: http://www.mongodb.org/display/DOCS/Sharding+Introduction#ShardingIntroduction-ShardKeys
Defaults to _id
-->
<name>mongo.input.split.split_key_pattern</name>
<value>{ _id: 1 }</value>
</property>
<property>
<!-- The field to pass as the mapper key. Defaults to _id if blank -->
<name>mongo.input.key</name>
<value></value>
</property>
<property>
<!-- if true, queries don't timeout (QUERY_NOTIMEOUT flag set) for Input Splits. May be erratic in behavior -->
<name>mongo.input.notimeout</name>
<value>false</value>
</property>
<property>
<!-- If you are reading from mongo, the URI -->
<name>mongo.input.uri</name>
<value><!-- mongodb://localhost/db.collection?option=value --></value>
</property>
<property>
<!-- If you are writing to mongo, the URI -->
<name>mongo.output.uri</name>
<value><!-- mongodb://localhost/db.collection?option=value --></value>
</property>
<property>
<!-- The query, in JSON, to execute [OPTIONAL] -->
<name>mongo.input.query</name>
<value></value>
</property>
<property>
<!-- The fields, in JSON, to read [OPTIONAL] -->
<name>mongo.input.fields</name>
<value></value>
</property>
<property>
<!-- A JSON sort specification for read [OPTIONAL] -->
<name>mongo.input.sort</name>
<value></value>
</property>
<property>
<!-- The number of documents to limit to for read [OPTIONAL] -->
<name>mongo.input.limit</name>
<value>0</value> <!-- 0 == no limit -->
</property>
<property>
<!-- The number of documents to skip in read [OPTIONAL] -->
<!-- TODO - Are we running limit() or skip() first? -->
<name>mongo.input.skip</name>
<value>0</value> <!-- 0 == no skip -->
</property>
<property>
<!-- IF you want to control the split size for input, set it here.
Should be a long indicating # of docs per split
Affects # of mappers so be careful what you do -->
<name>mongo.input.split_size</name>
</property>
<!-- These .job.* class defs are optional and only needed if you use the MongoTool baseclass -->
<property>
<!-- Class for the mapper -->
<name>mongo.job.mapper</name>
<value></value>
</property>
<property>
<!-- Reducer class -->
<name>mongo.job.reducer</name>
<value></value>
</property>
<property>
<!-- InputFormat Class -->
<name>mongo.job.input.format</name>
<!-- <value>com.mongodb.hadoop.MongoInputFormat</value> -->
<value></value>
</property>
<property>
<!-- OutputFormat Class -->
<name>mongo.job.output.format</name>
<!-- <value>com.mongodb.hadoop.MongoOutputFormat</value> -->
<value></value>
</property>
<property>
<!-- Output key class for the output format -->
<name>mongo.job.output.key</name>
<value></value>
</property>
<property>
<!-- Output value class for the output format -->
<name>mongo.job.output.value</name>
<value></value>
</property>
<property>
<!-- Output key class for the mapper [optional] -->
<name>mongo.job.mapper.output.key</name>
<value></value>
</property>
<property>
<!-- Output value class for the mapper [optional] -->
<name>mongo.job.mapper.output.value</name>
<value></value>
</property>
<property>
<!-- Class for the combiner [optional] -->
<name>mongo.job.combiner</name>
<value></value>
</property>
<property>
<!-- Partitioner class [optional] -->
<name>mongo.job.partitioner</name>
<value></value>
</property>
<property>
<!-- Sort Comparator class [optional] -->
<name>mongo.job.sort_comparator</name>
<value></value>
</property>
</configuration>
Jump to Line
Something went wrong with that request. Please try again.