This repository has been archived by the owner on Jul 6, 2022. It is now read-only.
/
mongoimport.scala
executable file
·223 lines (196 loc) · 7.71 KB
/
mongoimport.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/bin/sh
L=`pwd`
cp=`echo $L/lib/*`
exec scala -cp "$cp" "$0" "$@"
!#
/*
* Copyright 2008-present MongoDB, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.concurrent.TimeUnit
import java.util.logging.Level
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future, Promise}
import scala.io.{BufferedSource, Source}
import com.mongodb.ConnectionString
import org.mongodb.scala.bson.collection.immutable.Document
import org.mongodb.scala.{Completed, MongoClient, MongoCollection}
/**
* An example program providing similar functionality as the `mongoimport` program
*
* As there is no core CSV library for Scala CSV import is an exercise left to the reader
*
* Add mongo-scala-driver-alldep jar to your path or add to ./lib directory and then run as a shell program:
*
* {{{
* ./mongoimport.scala -u mongodb://localhost/test.testData --drop < data/testData.json
* }}}
*
* Alternatively, run the `main` method in an IDE and pass the arguments in.
*
*/
object mongoimport {
val usage = """
|Import JSON data into MongoDB using Casbah
|
|When importing JSON documents, each document must be a separate line of the input file.
|
|Example:
| mongoimport --uri mongodb://localhost/my_db.my_collection < mydocfile.json
|
|Options:
| --help produce help message
| --quiet silence all non error diagnostic messages
| -u [ --uri ] arg The connection URI - must contain a collection
| mongodb://[username:password@]host1[:port1][,host2[:port2]]/database.collection[?options]
| See: http://docs.mongodb.org/manual/reference/connection-string/
| --file arg file to import from; if not specified stdin is used
| --drop drop collection first
""".stripMargin
/**
* The main export program
* @param args the commandline arguments
*/
def main(args: Array[String]) {
// The time when the execution of this program started, in milliseconds since 1 January 1970 UTC.
val executionStart: Long = currentTime
if (args.length == 0 | args.contains("--help")) {
Console.err.println(usage)
sys.exit(1)
}
// Set the debug log level
java.util.logging.Logger.getLogger("").getHandlers.foreach(h => h.setLevel(Level.WARNING))
val optionMap = parseArgs(Map(), args.toList)
val options = getOptions(optionMap)
if (options.uri.isEmpty) {
Console.err.println(s"Missing URI")
Console.err.println(usage)
sys.exit(1)
}
// Get source
val importSource: BufferedSource = options.file match {
case None => Source.stdin
case Some(fileName) => Source.fromFile(fileName)
}
// Get URI
val mongoClientURI = new ConnectionString(options.uri.get)
if (Option(mongoClientURI.getCollection).isEmpty) {
Console.err.println(s"Missing collection name in the URI eg: mongodb://<hostInformation>/<database>.<collection>[?options]")
Console.err.println(s"Current URI: $mongoClientURI")
sys.exit(1)
}
// Get the collection
val mongoClient = MongoClient(mongoClientURI.getURI)
val collection = mongoClient.getDatabase(mongoClientURI.getDatabase).getCollection(mongoClientURI.getCollection)
if (options.drop) {
if (!options.quiet) Console.err.println(s"Dropping: ${mongoClientURI.getCollection}")
Await.result(collection.drop().head(), Duration(10, TimeUnit.SECONDS))
}
if (!options.quiet) Console.err.print("Importing...")
val importer = importJson(collection, importSource.getLines(), Promise[Completed]()).future
showPinWheel(importer)
importSource.close()
val total = currentTime - executionStart
if (!options.quiet) Console.err.println(s"Finished import: $total ms")
}
/**
* Imports JSON into the collection
*
* @param collection the mongodb collection to insert into
* @param lines the iterator from the importSource
* @param promise the promise that is fufilled on completion or on error
* @return the promise
*/
private def importJson(collection: MongoCollection[Document], lines: Iterator[String], promise: Promise[Completed]): Promise[Completed] = {
lines.hasNext match {
case true =>
val remainingLines = lines.take(1000)
collection.insertMany(remainingLines.map(json => Document(json)).toSeq).subscribe(
(completed: Completed) => remainingLines.hasNext match {
case true => importJson(collection, remainingLines, promise)
case false => promise.success(completed)
},
(failed: Throwable) => promise.failure(failed)
)
case false => promise.success(Completed())
}
promise
}
/**
* Recursively convert the args list into a Map of options
*
* @param map - the initial option map
* @param args - the args list
* @return the parsed OptionMap
*/
private def parseArgs(map: Map[String, Any], args: List[String]): Map[String, Any] = {
args match {
case Nil => map
case "--quiet" :: tail =>
parseArgs(map ++ Map("quiet" -> true), tail)
case "-u" :: value :: tail =>
parseArgs(map ++ Map("uri" -> value), tail)
case "--uri" :: value :: tail =>
parseArgs(map ++ Map("uri" -> value), tail)
case "--file" :: value :: tail =>
parseArgs(map ++ Map("file" -> value), tail)
case "--drop" :: tail =>
parseArgs(map ++ Map("drop" -> true), tail)
case option :: tail =>
Console.err.println("Unknown option " + option)
Console.err.println(usage)
sys.exit(1)
}
}
/**
* Convert the optionMap to an Options instance
* @param optionMap the parsed args options
* @return Options instance
*/
private def getOptions(optionMap: Map[String, _]): Options = {
val default = Options()
Options(
quiet = optionMap.getOrElse("quiet", default.quiet).asInstanceOf[Boolean],
uri = optionMap.get("uri") match {
case None => default.uri
case Some(value) => Some(value.asInstanceOf[String])
},
file = optionMap.get("file") match {
case None => default.file
case Some(value) => Some(value.asInstanceOf[String])
},
drop = optionMap.getOrElse("drop", default.drop).asInstanceOf[Boolean]
)
}
case class Options(quiet: Boolean = false, uri: Option[String] = None, file: Option[String] = None, drop: Boolean = false)
private def currentTime = System.currentTimeMillis()
/**
* Shows a pinWheel in the console.err
* @param someFuture the future we are all waiting for
*/
private def showPinWheel(someFuture: Future[_]) {
// Let the user know something is happening until futureOutput isCompleted
val spinChars = List("|", "/", "-", "\\")
while (!someFuture.isCompleted) {
spinChars.foreach({
case char =>
Console.err.print(char)
Thread sleep 200
Console.err.print("\b")
})
}
Console.err.println("")
}
}
mongoimport.main(args)