-
Notifications
You must be signed in to change notification settings - Fork 23
SWIFT-1072 Improve insertion performance #55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2be5c65
e5d7c0e
3f51435
d9a8a76
8d2df21
fb1ed82
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,12 +74,27 @@ public struct BSONDocument { | |
* - SeeAlso: http://bsonspec.org/ | ||
*/ | ||
public init(fromBSON bson: ByteBuffer) throws { | ||
let storage = BSONDocumentStorage(bson) | ||
let storage = BSONDocumentStorage(bson.slice()) | ||
try storage.validate() | ||
self = BSONDocument(fromUnsafeBSON: storage) | ||
self.storage = storage | ||
} | ||
|
||
/** | ||
* Initialize a new `BSONDocument` from the provided BSON data without validating the elements. The first four | ||
* bytes must accurately reflect the length of the buffer, however. | ||
* | ||
* If invalid BSON data is provided, undefined behavior or server-side errors may occur when using the | ||
* resultant `BSONDocument`. | ||
* | ||
* - Throws: `BSONError.InvalidArgumentError` if the provided BSON's length does not match the encoded length. | ||
*/ | ||
public init(fromBSONWithoutValidatingElements bson: ByteBuffer) throws { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One thing that isn't reflected in this PR is that something like the following could panic: let doc = try BSONDocument(fromBSONWithoutValidatingElements: bson)
for i in 0..<doc.count {
print(doc[i])
} This is because the index-based subscript doesn't return an optional since it just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm. I'm not sure we really have any other option besides like returning some placeholder value, right? it seems OK to me, I honestly kind of doubt people are using this subscript much anyway |
||
let storage = BSONDocumentStorage(bson) | ||
try self.init(fromBSONWithoutValidatingElements: storage) | ||
} | ||
|
||
internal init(fromUnsafeBSON storage: BSONDocumentStorage) { | ||
internal init(fromBSONWithoutValidatingElements storage: BSONDocumentStorage) throws { | ||
try storage.validateLength() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since it's basically free to validate the length and it makes the iterator logic a lot safer, it made sense to do it as a bare minimum validation. |
||
self.storage = storage | ||
} | ||
|
||
|
@@ -129,23 +144,15 @@ public struct BSONDocument { | |
|
||
/// The keys in this `BSONDocument`. | ||
public var keys: [String] { | ||
do { | ||
return try BSONDocumentIterator.getKeys(from: self.storage.buffer) | ||
} catch { | ||
fatalError("Failed to retrieve keys for document") | ||
} | ||
BSONDocumentIterator.getKeys(from: self.storage.buffer) | ||
} | ||
|
||
/// The values in this `BSONDocument`. | ||
public var values: [BSON] { self.map { _, val in val } } | ||
|
||
/// The number of (key, value) pairs stored at the top level of this document. | ||
public var count: Int { | ||
do { | ||
return try BSONDocumentIterator.getKeys(from: self.storage.buffer).count | ||
} catch { | ||
return 0 | ||
} | ||
BSONDocumentIterator.getKeys(from: self.storage.buffer).count | ||
} | ||
|
||
/// A copy of the `ByteBuffer` backing this document, containing raw BSON data. As `ByteBuffer`s implement | ||
|
@@ -158,7 +165,8 @@ public struct BSONDocument { | |
|
||
/// Returns a `Boolean` indicating whether this `BSONDocument` contains the provided key. | ||
public func hasKey(_ key: String) -> Bool { | ||
(try? BSONDocumentIterator.find(key: key, in: self)) != nil | ||
let it = self.makeIterator() | ||
return it.findValue(forKey: key) != nil | ||
} | ||
|
||
/** | ||
|
@@ -174,19 +182,10 @@ public struct BSONDocument { | |
*/ | ||
public subscript(key: String) -> BSON? { | ||
get { | ||
do { | ||
return try BSONDocumentIterator.find(key: key, in: self)?.value | ||
} catch { | ||
fatalError("Error looking up key \(key) in document: \(error)") | ||
} | ||
BSONDocumentIterator.find(key: key, in: self)?.value | ||
} | ||
set { | ||
// The only time this would crash is document too big error | ||
do { | ||
return try self.set(key: key, to: newValue) | ||
} catch { | ||
fatalError("Failed to set \(key) to \(String(describing: newValue)): \(error)") | ||
} | ||
self.set(key: key, to: newValue) | ||
} | ||
} | ||
|
||
|
@@ -251,10 +250,9 @@ public struct BSONDocument { | |
) | ||
} | ||
newStorage.buffer.writeBytes(suffix) | ||
newStorage.encodedLength = newSize | ||
|
||
var document = BSONDocument(fromUnsafeBSON: newStorage) | ||
document.storage.encodedLength = newSize | ||
return document | ||
return try BSONDocument(fromBSONWithoutValidatingElements: newStorage) | ||
} | ||
|
||
/// Appends the provided key value pair without checking to see if the key already exists. | ||
|
@@ -271,8 +269,8 @@ public struct BSONDocument { | |
* Sets a BSON element with the corresponding key | ||
* if element.value is nil the element is deleted from the BSON | ||
*/ | ||
internal mutating func set(key: String, to value: BSON?) throws { | ||
guard let range = try BSONDocumentIterator.findByteRange(for: key, in: self) else { | ||
private mutating func set(key: String, to value: BSON?) { | ||
guard let range = BSONDocumentIterator.findByteRange(for: key, in: self) else { | ||
guard let value = value else { | ||
// no-op: key does not exist and the value is nil | ||
return | ||
|
@@ -285,18 +283,18 @@ public struct BSONDocument { | |
let suffixLength = self.storage.encodedLength - range.endIndex | ||
|
||
guard | ||
let prefix = self.storage.buffer.getBytes(at: 0, length: prefixLength), | ||
var prefix = self.storage.buffer.getSlice(at: 0, length: prefixLength), | ||
let suffix = self.storage.buffer.getBytes(at: range.endIndex, length: suffixLength) | ||
else { | ||
throw BSONError.InternalError( | ||
message: "Cannot slice buffer from " + | ||
fatalError( | ||
"Cannot slice buffer from " + | ||
"0 to len \(range.startIndex) and from \(range.endIndex) " + | ||
"to len \(suffixLength) : \(self.storage.buffer)" | ||
) | ||
} | ||
|
||
var newStorage = BSONDocumentStorage() | ||
newStorage.buffer.writeBytes(prefix) | ||
newStorage.buffer.writeBuffer(&prefix) | ||
|
||
var newSize = self.storage.encodedLength - (range.endIndex - range.startIndex) | ||
if let value = value { | ||
|
@@ -305,7 +303,7 @@ public struct BSONDocument { | |
newSize += size | ||
|
||
guard newSize <= BSON_MAX_SIZE else { | ||
throw BSONError.DocumentTooLargeError(value: value.bsonValue, forKey: key) | ||
fatalError(BSONError.DocumentTooLargeError(value: value.bsonValue, forKey: key).message) | ||
} | ||
} | ||
|
||
|
@@ -389,8 +387,12 @@ public struct BSONDocument { | |
return totalBytes | ||
} | ||
|
||
internal func validate() throws { | ||
// Pull apart the underlying binary into [KeyValuePair], should reveal issues | ||
/// Verify that the encoded length matches the actual length of the buffer and that the buffer is | ||
/// isn't too small or too large. | ||
/// | ||
/// - Throws: `BSONError.InvalidArgumentError` if validation fails | ||
/// | ||
internal func validateLength() throws { | ||
guard let encodedLength = self.buffer.getInteger(at: 0, endianness: .little, as: Int32.self) else { | ||
throw BSONError.InvalidArgumentError(message: "Validation Failed: Cannot read encoded length") | ||
} | ||
|
@@ -403,11 +405,16 @@ public struct BSONDocument { | |
|
||
guard encodedLength == self.buffer.readableBytes else { | ||
throw BSONError.InvalidArgumentError( | ||
message: "BSONDocument's encoded byte length is \(encodedLength), however the" + | ||
message: "BSONDocument's encoded byte length is \(encodedLength), however the " + | ||
"buffer has \(self.buffer.readableBytes) readable bytes" | ||
) | ||
} | ||
} | ||
|
||
internal func validate() throws { | ||
try self.validateLength() | ||
|
||
// Pull apart the underlying binary into [KeyValuePair], should reveal issues | ||
var keySet = Set<String>() | ||
let iter = BSONDocumentIterator(over: self.buffer) | ||
do { | ||
|
@@ -528,7 +535,7 @@ extension BSONDocument: BSONValue { | |
throw BSONError.InternalError(message: "Cannot read document contents") | ||
} | ||
|
||
return .document(BSONDocument(fromUnsafeBSON: BSONDocument.BSONDocumentStorage(bytes))) | ||
return .document(try BSONDocument(fromBSONWithoutValidatingElements: BSONDocument.BSONDocumentStorage(bytes))) | ||
} | ||
|
||
internal func write(to buffer: inout ByteBuffer) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this will be the initializer that we use in the driver when dealing with BSON from libmongoc. It has to be a part of the public API since we use it in our other package, but I think that's okay because I can imagine users may find themselves in a similar situation.
I'm open to less verbose / different names for this by the way. I chose this one since I figured we might as well make it as explicit as possible if we don't want users to accidentally use this initializer wrongly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have an idea for a better name at the moment. however, I wonder if it's worth considering combining this with the previous initializer and adding a
validateElements
or something parameter that defaults totrue
? I don't feel that strongly in either direction.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm yeah that's also an interesting idea. I think I lean slightly towards a separate initializer here since the error outcomes and performance characteristics change a lot depending on the value of the parameter, and having separate initializers may help to communicate this more explicitly. In general though, adding the parameter instead of having a mouthful single parameter seems to make more sense.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
keeping them separate for that reason makes sense to me. I'm fine with keeping this as-is