Skip to content

Commit

Permalink
feat(NODE-5958): add BSON iterating API (#656)
Browse files Browse the repository at this point in the history
Co-authored-by: Aditi Khare <106987683+aditi-khare-mongoDB@users.noreply.github.com>
  • Loading branch information
nbbeeken and aditi-khare-mongoDB committed Mar 15, 2024
1 parent 2f0effb commit 269df91
Show file tree
Hide file tree
Showing 8 changed files with 631 additions and 25 deletions.
1 change: 1 addition & 0 deletions .eslintrc.json
Expand Up @@ -64,6 +64,7 @@
"@typescript-eslint/no-unsafe-return": "off",
"@typescript-eslint/no-unsafe-argument": "off",
"@typescript-eslint/no-unsafe-call": "off",
"@typescript-eslint/no-unsafe-enum-comparison": "off",
"@typescript-eslint/consistent-type-imports": [
"error",
{
Expand Down
2 changes: 1 addition & 1 deletion src/bson.ts
Expand Up @@ -54,7 +54,7 @@ export { BSONValue } from './bson_value';
export { BSONError, BSONVersionError, BSONRuntimeError } from './error';
export { BSONType } from './constants';
export { EJSON } from './extended_json';
export { onDemand } from './parser/on_demand/index';
export { onDemand, type OnDemand } from './parser/on_demand/index';

/** @public */
export interface Document {
Expand Down
17 changes: 17 additions & 0 deletions src/parser/on_demand/index.ts
@@ -1,5 +1,6 @@
import { type BSONError, BSONOffsetError } from '../../error';
import { type BSONElement, parseToElements } from './parse_to_elements';
import { type BSONReviver, type Container, parseToStructure } from './parse_to_structure';
/**
* @experimental
* @public
Expand All @@ -12,6 +13,21 @@ export type OnDemand = {
isBSONError(value: unknown): value is BSONError;
};
parseToElements: (this: void, bytes: Uint8Array, startOffset?: number) => Iterable<BSONElement>;
parseToStructure: <
TRoot extends Container = {
dest: Record<string, unknown>;
kind: 'object';
}
>(
bytes: Uint8Array,
startOffset?: number,
root?: TRoot,
reviver?: BSONReviver
) => TRoot extends undefined ? Record<string, unknown> : TRoot['dest'];
// Types
BSONElement: BSONElement;
Container: Container;
BSONReviver: BSONReviver;
};

/**
Expand All @@ -21,6 +37,7 @@ export type OnDemand = {
const onDemand: OnDemand = Object.create(null);

onDemand.parseToElements = parseToElements;
onDemand.parseToStructure = parseToStructure;
onDemand.BSONOffsetError = BSONOffsetError;

Object.freeze(onDemand);
Expand Down
61 changes: 41 additions & 20 deletions src/parser/on_demand/parse_to_elements.ts
@@ -1,4 +1,3 @@
/* eslint-disable @typescript-eslint/no-unsafe-enum-comparison */
import { BSONOffsetError } from '../../error';

/**
Expand All @@ -9,7 +8,7 @@ import { BSONOffsetError } from '../../error';
* - `minKey` is set to 255 so unsigned comparisons succeed
* - Modify with caution, double check the bundle contains literals
*/
const enum t {
const enum BSONElementType {
double = 1,
string = 2,
object = 3,
Expand Down Expand Up @@ -45,8 +44,11 @@ export type BSONElement = [
length: number
];

/** Parses a int32 little-endian at offset, throws if it is negative */
function getSize(source: Uint8Array, offset: number): number {
/**
* @internal
* Parses a int32 little-endian at offset, throws if it is negative
*/
export function getSize(source: Uint8Array, offset: number): number {
if (source[offset + 3] > 127) {
throw new BSONOffsetError('BSON size cannot be negative', offset);
}
Expand Down Expand Up @@ -80,7 +82,12 @@ function findNull(bytes: Uint8Array, offset: number): number {
* @public
* @experimental
*/
export function parseToElements(bytes: Uint8Array, startOffset = 0): Iterable<BSONElement> {
export function parseToElements(
bytes: Uint8Array,
startOffset: number | null = 0
): Iterable<BSONElement> {
startOffset ??= 0;

if (bytes.length < 5) {
throw new BSONOffsetError(
`Input must be at least 5 bytes, got ${bytes.length} bytes`,
Expand Down Expand Up @@ -121,37 +128,51 @@ export function parseToElements(bytes: Uint8Array, startOffset = 0): Iterable<BS

let length: number;

if (type === t.double || type === t.long || type === t.date || type === t.timestamp) {
if (
type === BSONElementType.double ||
type === BSONElementType.long ||
type === BSONElementType.date ||
type === BSONElementType.timestamp
) {
length = 8;
} else if (type === t.int) {
} else if (type === BSONElementType.int) {
length = 4;
} else if (type === t.objectId) {
} else if (type === BSONElementType.objectId) {
length = 12;
} else if (type === t.decimal) {
} else if (type === BSONElementType.decimal) {
length = 16;
} else if (type === t.bool) {
} else if (type === BSONElementType.bool) {
length = 1;
} else if (type === t.null || type === t.undefined || type === t.maxKey || type === t.minKey) {
} else if (
type === BSONElementType.null ||
type === BSONElementType.undefined ||
type === BSONElementType.maxKey ||
type === BSONElementType.minKey
) {
length = 0;
}
// Needs a size calculation
else if (type === t.regex) {
else if (type === BSONElementType.regex) {
length = findNull(bytes, findNull(bytes, offset) + 1) + 1 - offset;
} else if (type === t.object || type === t.array || type === t.javascriptWithScope) {
} else if (
type === BSONElementType.object ||
type === BSONElementType.array ||
type === BSONElementType.javascriptWithScope
) {
length = getSize(bytes, offset);
} else if (
type === t.string ||
type === t.binData ||
type === t.dbPointer ||
type === t.javascript ||
type === t.symbol
type === BSONElementType.string ||
type === BSONElementType.binData ||
type === BSONElementType.dbPointer ||
type === BSONElementType.javascript ||
type === BSONElementType.symbol
) {
length = getSize(bytes, offset) + 4;
if (type === t.binData) {
if (type === BSONElementType.binData) {
// binary subtype
length += 1;
}
if (type === t.dbPointer) {
if (type === BSONElementType.dbPointer) {
// dbPointer's objectId
length += 12;
}
Expand Down
145 changes: 145 additions & 0 deletions src/parser/on_demand/parse_to_structure.ts
@@ -0,0 +1,145 @@
import { type Code } from '../../code';
import { type BSONElement, getSize, parseToElements } from './parse_to_elements';

/** @internal */
const DEFAULT_REVIVER: BSONReviver = (
_bytes: Uint8Array,
_container: Container,
_element: BSONElement
) => null;

/** @internal */
function parseToElementsToArray(bytes: Uint8Array, offset?: number | null): BSONElement[] {
const res = parseToElements(bytes, offset);
return Array.isArray(res) ? res : [...res];
}

/** @internal */
type ParseContext = {
elementOffset: number;
elements: BSONElement[];
container: Container;
previous: ParseContext | null;
};

/**
* @experimental
* @public
* A union of the possible containers for BSON elements.
*
* Depending on kind, a reviver can accurately assign a value to a name on the container.
*/
export type Container =
| {
dest: Record<string, unknown>;
kind: 'object';
}
| {
dest: Map<string, unknown>;
kind: 'map';
}
| {
dest: Array<unknown>;
kind: 'array';
}
| {
dest: Code;
kind: 'code';
}
| {
kind: 'custom';
// eslint-disable-next-line @typescript-eslint/no-explicit-any
dest: any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[key: string]: any;
};

/**
* @experimental
* @public
*/
export type BSONReviver = (
bytes: Uint8Array,
container: Container,
element: BSONElement
) => Container | null;

/**
* @experimental
* @public
*/
export function parseToStructure<
TRoot extends Container = {
dest: Record<string, unknown>;
kind: 'object';
}
>(
bytes: Uint8Array,
startOffset?: number | null,
pRoot?: TRoot | null,
pReviver?: BSONReviver | null
): TRoot extends undefined ? Record<string, unknown> : TRoot['dest'] {
const root = pRoot ?? {
kind: 'object',
dest: Object.create(null) as Record<string, unknown>
};

const reviver = pReviver ?? DEFAULT_REVIVER;

let ctx: ParseContext | null = {
elementOffset: 0,
elements: parseToElementsToArray(bytes, startOffset),
container: root,
previous: null
};

/** BSONElement offsets: type indicator and value offset */
const enum BSONElementOffset {
type = 0,
offset = 3
}

/** BSON Embedded types */
const enum BSONElementType {
object = 3,
array = 4,
javascriptWithScope = 15
}

embedded: while (ctx !== null) {
for (
let bsonElement: BSONElement | undefined = ctx.elements[ctx.elementOffset++];
bsonElement != null;
bsonElement = ctx.elements[ctx.elementOffset++]
) {
const type = bsonElement[BSONElementOffset.type];
const offset = bsonElement[BSONElementOffset.offset];

const container = reviver(bytes, ctx.container, bsonElement);
const isEmbeddedType =
type === BSONElementType.object ||
type === BSONElementType.array ||
type === BSONElementType.javascriptWithScope;

if (container != null && isEmbeddedType) {
const docOffset: number =
type !== BSONElementType.javascriptWithScope
? offset
: // value offset + codeSize + value int + code int
offset + getSize(bytes, offset + 4) + 4 + 4;

ctx = {
elementOffset: 0,
elements: parseToElementsToArray(bytes, docOffset),
container,
previous: ctx
};

continue embedded;
}
}
ctx = ctx.previous;
}

return root.dest;
}

0 comments on commit 269df91

Please sign in to comment.