This repository has been archived by the owner on Jan 29, 2022. It is now read-only.
/
MongoInputSplit.java
129 lines (108 loc) · 4.13 KB
/
MongoInputSplit.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
// MongoInputSplit.java
/*
* Copyright 2010 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mongodb.hadoop.mapred.input;
import com.mongodb.*;
import com.mongodb.hadoop.util.*;
import com.mongodb.util.*;
import org.apache.commons.logging.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
import java.io.*;
import java.util.*;
@SuppressWarnings( "deprecation" )
public class MongoInputSplit implements Writable, InputSplit {
public MongoInputSplit( MongoURI inputURI, DBObject query, DBObject fields, DBObject sort, int limit, int skip ){
log.info( "Creating a new MongoInputSplit for MongoURI '" + inputURI + "', query: '" + query + "', fieldSpec: '"
+ fields + "', sort: '" + sort + "', limit: " + limit + ", skip: " + skip + " ." );
_mongoURI = inputURI;
_querySpec = query;
_fieldSpec = fields;
_sortSpec = sort;
_limit = limit;
_skip = skip;
getCursor();
}
public MongoInputSplit( com.mongodb.hadoop.input.MongoInputSplit split ){
this(split.getMongoURI(), split.getQuerySpec(), split.getFieldSpec(), split.getSortSpec(), split.getLimit(),
split.getSkip());
}
/**
* This is supposed to return the size of the split in bytes, but for now, for sanity sake we return the # of docs
* in the split instead.
*
* @return
*/
@Override
public long getLength(){
return Long.MAX_VALUE;
}
@Override
public String[] getLocations(){
return _mongoURI.getHosts().toArray( new String[_mongoURI.getHosts().size()] );
}
/**
* Serialize the Split instance
*/
public void write( DataOutput out ) throws IOException{
out.writeUTF( _mongoURI.toString() );
out.writeUTF( JSON.serialize( _querySpec ) );
out.writeUTF( JSON.serialize( _fieldSpec ) );
out.writeUTF( JSON.serialize( _sortSpec ) );
out.writeInt( _limit );
out.writeInt( _skip );
}
@Override
public void readFields( DataInput in ) throws IOException{
_mongoURI = new MongoURI( in.readUTF() );
_querySpec = (DBObject) JSON.parse( in.readUTF() );
_fieldSpec = (DBObject) JSON.parse( in.readUTF() );
_sortSpec = (DBObject) JSON.parse( in.readUTF() );
_limit = in.readInt();
_skip = in.readInt();
getCursor();
if ( log.isDebugEnabled() ){
log.debug( "Deserialized MongoInputSplit ... { length = " + getLength() + ", locations = "
+ Arrays.toString( getLocations() ) + ", query = " + _querySpec
+ ", fields = " + _fieldSpec + ", sort = " + _sortSpec + ", limit = " + _limit + ", skip = "
+ _skip + "}" );
}
}
DBCursor getCursor(){
// Return the cursor with the split's query, etc. already slotted in for
// them.
// todo - support limit/skip
if ( _cursor == null ){
_cursor = MongoConfigUtil.getCollection( _mongoURI ).find( _querySpec, _fieldSpec ).sort( _sortSpec );
_cursor.slaveOk();
}
return _cursor;
}
@Override
public String toString(){
return "MongoInputSplit{URI=" + _mongoURI + ", query=" + _querySpec + '}';
}
public MongoInputSplit(){ }
private MongoURI _mongoURI;
private DBObject _querySpec;
private DBObject _fieldSpec;
private DBObject _sortSpec;
private int _limit = 0;
private int _skip = 0;
private long _length = -1;
private transient DBCursor _cursor;
private static final Log log = LogFactory.getLog( MongoInputSplit.class );
}