-
Notifications
You must be signed in to change notification settings - Fork 23
SWIFT-1070 Improve performance of BSONDecoder #54
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,7 +15,7 @@ extension BSONDocument: Sequence { | |
public typealias SubSequence = BSONDocument | ||
|
||
/// Returns a `Bool` indicating whether the document is empty. | ||
public var isEmpty: Bool { self.keySet.isEmpty } | ||
public var isEmpty: Bool { self.storage.encodedLength == BSON_MIN_SIZE } | ||
|
||
/// Returns a `DocumentIterator` over the values in this `Document`. | ||
public func makeIterator() -> BSONDocumentIterator { | ||
|
@@ -29,7 +29,11 @@ extension BSONDocument: Sequence { | |
public func map<T>( | ||
_ transform: (Element) throws -> T | ||
) rethrows -> [T] { | ||
try AnySequence(self).map(transform) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LOL I was looking at the method body but not the actual context of what method you were implementing here and I was like "... why isn't he just calling |
||
var values: [T] = [] | ||
for (k, v) in self { | ||
values.append(try transform((key: k, value: v))) | ||
} | ||
return values | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,28 +15,15 @@ public struct BSONDocument { | |
/// The element type of a document: a tuple containing an individual key-value pair. | ||
public typealias KeyValuePair = (key: String, value: BSON) | ||
|
||
private var storage: BSONDocumentStorage | ||
|
||
/// An unordered set containing the keys in this document. | ||
internal private(set) var keySet: Set<String> | ||
internal var storage: BSONDocumentStorage | ||
|
||
internal init(_ elements: [BSON]) { | ||
self = BSONDocument(keyValuePairs: elements.enumerated().map { i, element in (String(i), element) }) | ||
} | ||
|
||
internal init(keyValuePairs: [(String, BSON)]) { | ||
self.keySet = Set(keyValuePairs.map { $0.0 }) | ||
guard self.keySet.count == keyValuePairs.count else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we still be making this assertion, even if we don't store the key set? I think without this we might allow a user to do something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch. Since this is primarily a concern of dictionary literal initializers, I've moved the validation into them instead of here, since this initializer is also used elsewhere (on potentially un-validated documents in the future). |
||
fatalError("Dictionary \(keyValuePairs) contains duplicate keys") | ||
} | ||
|
||
self.storage = BSONDocumentStorage() | ||
|
||
guard !self.keySet.isEmpty else { | ||
self = BSONDocument() | ||
return | ||
} | ||
|
||
let start = self.storage.buffer.writerIndex | ||
|
||
// reserve space for our byteLength that will be calculated | ||
|
@@ -58,7 +45,6 @@ public struct BSONDocument { | |
|
||
/// Initializes a new, empty `BSONDocument`. | ||
public init() { | ||
self.keySet = Set() | ||
self.storage = BSONDocumentStorage() | ||
self.storage.buffer.writeInteger(5, endianness: .little, as: Int32.self) | ||
self.storage.buffer.writeBytes([0]) | ||
|
@@ -89,13 +75,12 @@ public struct BSONDocument { | |
*/ | ||
public init(fromBSON bson: ByteBuffer) throws { | ||
let storage = BSONDocumentStorage(bson) | ||
let keys = try storage.validateAndRetrieveKeys() | ||
self = BSONDocument(fromUnsafeBSON: storage, keys: keys) | ||
try storage.validate() | ||
self = BSONDocument(fromUnsafeBSON: storage) | ||
} | ||
|
||
internal init(fromUnsafeBSON storage: BSONDocumentStorage, keys: Set<String>) { | ||
internal init(fromUnsafeBSON storage: BSONDocumentStorage) { | ||
self.storage = storage | ||
self.keySet = keys | ||
} | ||
|
||
/** | ||
|
@@ -155,7 +140,13 @@ public struct BSONDocument { | |
public var values: [BSON] { self.map { _, val in val } } | ||
|
||
/// The number of (key, value) pairs stored at the top level of this document. | ||
public var count: Int { self.keySet.count } | ||
public var count: Int { | ||
do { | ||
return try BSONDocumentIterator.getKeys(from: self.storage.buffer).count | ||
} catch { | ||
return 0 | ||
} | ||
} | ||
|
||
/// A copy of the `ByteBuffer` backing this document, containing raw BSON data. As `ByteBuffer`s implement | ||
/// copy-on-write, this copy will share byte storage with this document until either the document or the returned | ||
|
@@ -166,7 +157,9 @@ public struct BSONDocument { | |
public func toData() -> Data { Data(self.storage.buffer.readableBytesView) } | ||
|
||
/// Returns a `Boolean` indicating whether this `BSONDocument` contains the provided key. | ||
public func hasKey(_ key: String) -> Bool { self.keySet.contains(key) } | ||
public func hasKey(_ key: String) -> Bool { | ||
(try? BSONDocumentIterator.find(key: key, in: self)) != nil | ||
kmahar marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/** | ||
* Allows getting and setting values on the document via subscript syntax. | ||
|
@@ -236,7 +229,7 @@ public struct BSONDocument { | |
* - SeeAlso: https://docs.mongodb.com/manual/core/document/#the-id-field | ||
*/ | ||
public func withID() throws -> BSONDocument { | ||
guard !self.keySet.contains("_id") else { | ||
guard !self.hasKey("_id") else { | ||
return self | ||
} | ||
|
||
|
@@ -259,37 +252,35 @@ public struct BSONDocument { | |
} | ||
newStorage.buffer.writeBytes(suffix) | ||
|
||
var newKeys = self.keySet | ||
newKeys.insert("_id") | ||
var document = BSONDocument(fromUnsafeBSON: newStorage, keys: newKeys) | ||
var document = BSONDocument(fromUnsafeBSON: newStorage) | ||
document.storage.encodedLength = newSize | ||
return document | ||
} | ||
|
||
/// Appends the provided key value pair without checking to see if the key already exists. | ||
/// Warning: appending two of the same key may result in errors or undefined behavior. | ||
internal mutating func append(key: String, value: BSON) { | ||
// setup to overwrite null terminator | ||
self.storage.buffer.moveWriterIndex(to: self.storage.encodedLength - 1) | ||
let size = self.storage.append(key: key, value: value) | ||
self.storage.buffer.writeInteger(0, endianness: .little, as: UInt8.self) // add back in our null terminator | ||
self.storage.encodedLength += size | ||
} | ||
|
||
/** | ||
* Sets a BSON element with the corresponding key | ||
* if element.value is nil the element is deleted from the BSON | ||
*/ | ||
internal mutating func set(key: String, to value: BSON?) throws { | ||
if !self.keySet.contains(key) { | ||
guard let range = try BSONDocumentIterator.findByteRange(for: key, in: self) else { | ||
guard let value = value else { | ||
// no-op: key does not exist and the value is nil | ||
return | ||
} | ||
// appending new key | ||
self.keySet.insert(key) | ||
// setup to overwrite null terminator | ||
self.storage.buffer.moveWriterIndex(to: self.storage.encodedLength - 1) | ||
let size = self.storage.append(key: key, value: value) | ||
self.storage.buffer.writeInteger(0, endianness: .little, as: UInt8.self) // add back in our null terminator | ||
self.storage.encodedLength += size | ||
self.append(key: key, value: value) | ||
return | ||
} | ||
|
||
guard let range = try BSONDocumentIterator.findByteRange(for: key, in: self) else { | ||
throw BSONError.InternalError(message: "Cannot find \(key) to delete") | ||
} | ||
|
||
let prefixLength = range.startIndex | ||
let suffixLength = self.storage.encodedLength - range.endIndex | ||
|
||
|
@@ -316,9 +307,6 @@ public struct BSONDocument { | |
guard newSize <= BSON_MAX_SIZE else { | ||
throw BSONError.DocumentTooLargeError(value: value.bsonValue, forKey: key) | ||
} | ||
} else { | ||
// Deleting | ||
self.keySet.remove(key) | ||
} | ||
|
||
newStorage.buffer.writeBytes(suffix) | ||
|
@@ -401,8 +389,7 @@ public struct BSONDocument { | |
return totalBytes | ||
} | ||
|
||
@discardableResult | ||
internal func validateAndRetrieveKeys() throws -> Set<String> { | ||
internal func validate() throws { | ||
// Pull apart the underlying binary into [KeyValuePair], should reveal issues | ||
guard let encodedLength = self.buffer.getInteger(at: 0, endianness: .little, as: Int32.self) else { | ||
throw BSONError.InvalidArgumentError(message: "Validation Failed: Cannot read encoded length") | ||
|
@@ -423,7 +410,6 @@ public struct BSONDocument { | |
|
||
var keySet = Set<String>() | ||
let iter = BSONDocumentIterator(over: self.buffer) | ||
// Implicitly validate with iterator | ||
do { | ||
while let (key, value) = try iter.nextThrowing() { | ||
let (inserted, _) = keySet.insert(key) | ||
|
@@ -432,26 +418,13 @@ public struct BSONDocument { | |
message: "Validation Failed: BSON contains multiple values for key \(key)" | ||
) | ||
} | ||
switch value { | ||
case let .document(doc): | ||
try doc.storage.validateAndRetrieveKeys() | ||
case let .array(array): | ||
for item in array { | ||
if let doc = item.documentValue { | ||
try doc.storage.validateAndRetrieveKeys() | ||
} | ||
} | ||
default: | ||
continue | ||
} | ||
try value.bsonValue.validate() | ||
} | ||
} catch let error as BSONError.InternalError { | ||
throw BSONError.InvalidArgumentError( | ||
message: "Validation Failed: \(error.message)" | ||
) | ||
} | ||
|
||
return keySet | ||
} | ||
} | ||
} | ||
|
@@ -469,6 +442,13 @@ extension BSONDocument: ExpressibleByDictionaryLiteral { | |
* - Returns: a new `BSONDocument` | ||
*/ | ||
public init(dictionaryLiteral keyValuePairs: (String, BSON)...) { | ||
self.init(dictionaryLiteral: Array(keyValuePairs)) | ||
} | ||
|
||
internal init(dictionaryLiteral keyValuePairs: [(String, BSON)]) { | ||
guard Set(keyValuePairs.map { $0.0 }).count == keyValuePairs.count else { | ||
fatalError("Dictionary \(keyValuePairs) contains duplicate keys") | ||
} | ||
self.init(keyValuePairs: keyValuePairs) | ||
} | ||
} | ||
|
@@ -548,13 +528,16 @@ extension BSONDocument: BSONValue { | |
throw BSONError.InternalError(message: "Cannot read document contents") | ||
} | ||
|
||
let keys = try BSONDocumentIterator.getKeySet(from: bytes) | ||
return .document(BSONDocument(fromUnsafeBSON: BSONDocument.BSONDocumentStorage(bytes), keys: keys)) | ||
return .document(BSONDocument(fromUnsafeBSON: BSONDocument.BSONDocumentStorage(bytes))) | ||
} | ||
|
||
internal func write(to buffer: inout ByteBuffer) { | ||
buffer.writeBytes(self.storage.buffer.readableBytesView) | ||
} | ||
|
||
internal func validate() throws { | ||
try self.storage.validate() | ||
} | ||
} | ||
|
||
extension BSONDocument: CustomStringConvertible { | ||
|
@@ -575,7 +558,7 @@ extension BSONDocument { | |
* - Returns: a `Bool` indicating whether the two documents are equal. | ||
*/ | ||
public func equalsIgnoreKeyOrder(_ other: BSONDocument) -> Bool { | ||
guard self.count == other.count else { | ||
guard self.storage.encodedLength == other.storage.encodedLength else { | ||
return false | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since
BSONDocument.read
doesn't validate anymore, we need to usenextThrowing
to ensure we don'tfatalError
. Full validation will be performed inArray.validate
if necessary.