From 2aaa33c185123275cf0dc38ca3b6f801b00cd46e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 1 Aug 2025 13:59:40 +0000 Subject: [PATCH 1/4] Initial plan From 6791eb673491630ba88b7b6f09801990dd92e5e1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 1 Aug 2025 14:23:29 +0000 Subject: [PATCH 2/4] feat: implement Apache Avro encoder with schema validation and tests Co-authored-by: streamich <9773803+streamich@users.noreply.github.com> --- src/avro/AvroEncoder.ts | 256 ++++++++++ src/avro/AvroSchemaEncoder.ts | 453 ++++++++++++++++++ src/avro/AvroSchemaValidator.ts | 319 ++++++++++++ src/avro/__tests__/AvroEncoder.spec.ts | 287 +++++++++++ src/avro/__tests__/AvroSchemaEncoder.spec.ts | 406 ++++++++++++++++ .../__tests__/AvroSchemaValidator.spec.ts | 360 ++++++++++++++ src/avro/index.ts | 4 + 7 files changed, 2085 insertions(+) create mode 100644 src/avro/AvroEncoder.ts create mode 100644 src/avro/AvroSchemaEncoder.ts create mode 100644 src/avro/AvroSchemaValidator.ts create mode 100644 src/avro/__tests__/AvroEncoder.spec.ts create mode 100644 src/avro/__tests__/AvroSchemaEncoder.spec.ts create mode 100644 src/avro/__tests__/AvroSchemaValidator.spec.ts create mode 100644 src/avro/index.ts diff --git a/src/avro/AvroEncoder.ts b/src/avro/AvroEncoder.ts new file mode 100644 index 00000000..618ff484 --- /dev/null +++ b/src/avro/AvroEncoder.ts @@ -0,0 +1,256 @@ +import type {IWriter, IWriterGrowable} from '@jsonjoy.com/util/lib/buffers'; +import type {BinaryJsonEncoder} from '../types'; + +/** + * Apache Avro binary encoder for basic value encoding. + * Implements the Avro binary encoding specification without schema validation. + * Based on https://avro.apache.org/docs/1.12.0/specification/ + */ +export class AvroEncoder implements BinaryJsonEncoder { + constructor(public readonly writer: IWriter & IWriterGrowable) {} + + public encode(value: unknown): Uint8Array { + const writer = this.writer; + writer.reset(); + this.writeAny(value); + return writer.flush(); + } + + /** + * Called when the encoder encounters a value that it does not know how to encode. + */ + public writeUnknown(value: unknown): void { + this.writeNull(); + } + + public writeAny(value: unknown): void { + switch (typeof value) { + case 'boolean': + return this.writeBoolean(value); + case 'number': + return this.writeNumber(value); + case 'string': + return this.writeStr(value); + case 'object': { + if (value === null) return this.writeNull(); + const constructor = value.constructor; + switch (constructor) { + case Object: + return this.writeObj(value as Record); + case Array: + return this.writeArr(value as unknown[]); + case Uint8Array: + return this.writeBin(value as Uint8Array); + default: + return this.writeUnknown(value); + } + } + case 'bigint': + return this.writeLong(value); + case 'undefined': + return this.writeNull(); + default: + return this.writeUnknown(value); + } + } + + /** + * Writes an Avro null value. + */ + public writeNull(): void { + // Null values are encoded as zero bytes + } + + /** + * Writes an Avro boolean value. + */ + public writeBoolean(bool: boolean): void { + this.writer.u8(bool ? 1 : 0); + } + + /** + * Writes an Avro int value using zigzag encoding. + */ + public writeInt(int: number): void { + this.writeVarIntSigned(this.encodeZigZag32(Math.trunc(int))); + } + + /** + * Writes an Avro long value using zigzag encoding. + */ + public writeLong(long: number | bigint): void { + if (typeof long === 'bigint') { + this.writeVarLong(this.encodeZigZag64(long)); + } else { + this.writeVarLong(this.encodeZigZag64(BigInt(Math.trunc(long)))); + } + } + + /** + * Writes an Avro float value using IEEE 754 single-precision. + */ + public writeFloatAvro(float: number): void { + this.writer.ensureCapacity(4); + this.writer.view.setFloat32(this.writer.x, float, true); // little-endian + this.writer.move(4); + } + + /** + * Writes an Avro double value using IEEE 754 double-precision. + */ + public writeDouble(double: number): void { + this.writer.ensureCapacity(8); + this.writer.view.setFloat64(this.writer.x, double, true); // little-endian + this.writer.move(8); + } + + /** + * Writes an Avro bytes value with length-prefixed encoding. + */ + public writeBin(bytes: Uint8Array): void { + this.writeVarIntUnsigned(bytes.length); + this.writer.buf(bytes, bytes.length); + } + + /** + * Writes an Avro string value with UTF-8 encoding and length prefix. + */ + public writeStr(str: string): void { + const bytes = new TextEncoder().encode(str); + this.writeVarIntUnsigned(bytes.length); + this.writer.buf(bytes, bytes.length); + } + + /** + * Writes an Avro array with length-prefixed encoding. + */ + public writeArr(arr: unknown[]): void { + this.writeVarIntUnsigned(arr.length); + for (const item of arr) { + this.writeAny(item); + } + this.writeVarIntUnsigned(0); // End of array marker + } + + /** + * Writes an Avro map with length-prefixed encoding. + */ + public writeObj(obj: Record): void { + const entries = Object.entries(obj); + this.writeVarIntUnsigned(entries.length); + for (const [key, value] of entries) { + this.writeStr(key); + this.writeAny(value); + } + this.writeVarIntUnsigned(0); // End of map marker + } + + // BinaryJsonEncoder interface methods + + /** + * Generic number writing - determines type based on value + */ + public writeNumber(num: number): void { + if (Number.isInteger(num)) { + if (num >= -2147483648 && num <= 2147483647) { + this.writeInt(num); + } else { + this.writeLong(num); + } + } else { + this.writeDouble(num); + } + } + + /** + * Writes an integer value + */ + public writeInteger(int: number): void { + this.writeInt(int); + } + + /** + * Writes an unsigned integer value + */ + public writeUInteger(uint: number): void { + this.writeInt(uint); + } + + /** + * Writes a float value (interface method) + */ + public writeFloat(float: number): void { + this.writeFloatValue(float); + } + + /** + * Writes a float value using IEEE 754 single-precision. + */ + private writeFloatValue(float: number): void { + this.writer.ensureCapacity(4); + this.writer.view.setFloat32(this.writer.x, float, true); // little-endian + this.writer.move(4); + } + + /** + * Writes an ASCII string (same as regular string in Avro) + */ + public writeAsciiStr(str: string): void { + this.writeStr(str); + } + + // Utility methods for Avro encoding + + /** + * Encodes a variable-length integer (for signed values with zigzag) + */ + private writeVarIntSigned(value: number): void { + let n = value >>> 0; // Convert to unsigned 32-bit + while (n >= 0x80) { + this.writer.u8((n & 0x7f) | 0x80); + n >>>= 7; + } + this.writer.u8(n & 0x7f); + } + + /** + * Encodes a variable-length integer (for unsigned values like lengths) + */ + private writeVarIntUnsigned(value: number): void { + let n = value >>> 0; // Convert to unsigned 32-bit + while (n >= 0x80) { + this.writer.u8((n & 0x7f) | 0x80); + n >>>= 7; + } + this.writer.u8(n & 0x7f); + } + + /** + * Encodes a variable-length long using Avro's encoding + */ + private writeVarLong(value: bigint): void { + let n = value; + const mask = BigInt(0x7f); + const shift = BigInt(7); + + while (n >= BigInt(0x80)) { + this.writer.u8(Number((n & mask) | BigInt(0x80))); + n >>= shift; + } + this.writer.u8(Number(n & mask)); + } + + /** + * Encodes a 32-bit integer using zigzag encoding + */ + private encodeZigZag32(value: number): number { + return (value << 1) ^ (value >> 31); + } + + /** + * Encodes a 64-bit integer using zigzag encoding + */ + private encodeZigZag64(value: bigint): bigint { + return (value << BigInt(1)) ^ (value >> BigInt(63)); + } +} \ No newline at end of file diff --git a/src/avro/AvroSchemaEncoder.ts b/src/avro/AvroSchemaEncoder.ts new file mode 100644 index 00000000..c717c73d --- /dev/null +++ b/src/avro/AvroSchemaEncoder.ts @@ -0,0 +1,453 @@ +import type {IWriter, IWriterGrowable} from '@jsonjoy.com/util/lib/buffers'; +import {AvroEncoder} from './AvroEncoder'; +import {AvroSchemaValidator} from './AvroSchemaValidator'; +import type { + AvroSchema, + AvroRecordSchema, + AvroEnumSchema, + AvroArraySchema, + AvroMapSchema, + AvroUnionSchema, + AvroFixedSchema, + AvroNamedSchema, +} from './types'; + +/** + * Apache Avro binary encoder with schema validation and encoding. + * Encodes values according to provided Avro schemas with proper validation. + * Based on https://avro.apache.org/docs/1.12.0/specification/ + */ +export class AvroSchemaEncoder { + private encoder: AvroEncoder; + private validator: AvroSchemaValidator; + private namedSchemas = new Map(); + + constructor(public readonly writer: IWriter & IWriterGrowable) { + this.encoder = new AvroEncoder(writer); + this.validator = new AvroSchemaValidator(); + } + + /** + * Encodes a value according to the provided schema. + */ + public encode(value: unknown, schema: AvroSchema, selectedIndex?: number): Uint8Array { + this.writer.reset(); + this.namedSchemas.clear(); + + // Validate schema first + if (!this.validator.validateSchema(schema)) { + throw new Error('Invalid Avro schema'); + } + + // Validate value against schema + if (!this.validator.validateValue(value, schema)) { + throw new Error('Value does not conform to schema'); + } + + this.collectNamedSchemas(schema); + + if (Array.isArray(schema) && selectedIndex !== undefined) { + this.writeUnion(value, schema, selectedIndex); + } else { + this.writeValue(value, schema); + } + + return this.writer.flush(); + } + + /** + * Writes a null value with schema validation. + */ + public writeNull(schema: AvroSchema): void { + this.validateSchemaType(schema, 'null'); + this.encoder.writeNull(); + } + + /** + * Writes a boolean value with schema validation. + */ + public writeBoolean(value: boolean, schema: AvroSchema): void { + this.validateSchemaType(schema, 'boolean'); + this.encoder.writeBoolean(value); + } + + /** + * Writes an int value with schema validation. + */ + public writeInt(value: number, schema: AvroSchema): void { + this.validateSchemaType(schema, 'int'); + if (!Number.isInteger(value) || value < -2147483648 || value > 2147483647) { + throw new Error('Value is not a valid 32-bit integer'); + } + this.encoder.writeInt(value); + } + + /** + * Writes a long value with schema validation. + */ + public writeLong(value: number | bigint, schema: AvroSchema): void { + this.validateSchemaType(schema, 'long'); + this.encoder.writeLong(value); + } + + /** + * Writes a float value with schema validation. + */ + public writeFloat(value: number, schema: AvroSchema): void { + this.validateSchemaType(schema, 'float'); + this.encoder.writeFloat(value); + } + + /** + * Writes a double value with schema validation. + */ + public writeDouble(value: number, schema: AvroSchema): void { + this.validateSchemaType(schema, 'double'); + this.encoder.writeDouble(value); + } + + /** + * Writes a bytes value with schema validation. + */ + public writeBytes(value: Uint8Array, schema: AvroSchema): void { + this.validateSchemaType(schema, 'bytes'); + this.encoder.writeBin(value); + } + + /** + * Writes a string value with schema validation. + */ + public writeString(value: string, schema: AvroSchema): void { + this.validateSchemaType(schema, 'string'); + this.encoder.writeStr(value); + } + + /** + * Writes a record value with schema validation. + */ + public writeRecord(value: Record, schema: AvroRecordSchema): void { + if (typeof schema === 'object' && schema.type !== 'record') { + throw new Error('Schema is not a record schema'); + } + + const recordSchema = this.resolveSchema(schema) as AvroRecordSchema; + if (recordSchema.type !== 'record') { + throw new Error('Schema is not a record schema'); + } + + for (const field of recordSchema.fields) { + const fieldValue = value[field.name]; + if (fieldValue !== undefined) { + this.writeValue(fieldValue, field.type); + } else if (field.default !== undefined) { + this.writeValue(field.default, field.type); + } else { + throw new Error(`Missing required field: ${field.name}`); + } + } + } + + /** + * Writes an enum value with schema validation. + */ + public writeEnum(value: string, schema: AvroEnumSchema): void { + if (typeof schema === 'object' && schema.type !== 'enum') { + throw new Error('Schema is not an enum schema'); + } + + const enumSchema = this.resolveSchema(schema) as AvroEnumSchema; + if (enumSchema.type !== 'enum') { + throw new Error('Schema is not an enum schema'); + } + + const index = enumSchema.symbols.indexOf(value); + if (index === -1) { + throw new Error(`Invalid enum value: ${value}`); + } + + this.writeVarIntSigned(this.encodeZigZag32(index)); + } + + /** + * Writes an array value with schema validation. + */ + public writeArray(value: unknown[], schema: AvroArraySchema): void { + if (typeof schema === 'object' && schema.type !== 'array') { + throw new Error('Schema is not an array schema'); + } + + const arraySchema = this.resolveSchema(schema) as AvroArraySchema; + if (arraySchema.type !== 'array') { + throw new Error('Schema is not an array schema'); + } + + // Write array length + this.writeVarIntUnsigned(value.length); + + // Write array items + for (const item of value) { + this.writeValue(item, arraySchema.items); + } + + // Write end-of-array marker + this.writeVarIntUnsigned(0); + } + + /** + * Writes a map value with schema validation. + */ + public writeMap(value: Record, schema: AvroMapSchema): void { + if (typeof schema === 'object' && schema.type !== 'map') { + throw new Error('Schema is not a map schema'); + } + + const mapSchema = this.resolveSchema(schema) as AvroMapSchema; + if (mapSchema.type !== 'map') { + throw new Error('Schema is not a map schema'); + } + + const entries = Object.entries(value); + + // Write map length + this.writeVarIntUnsigned(entries.length); + + // Write map entries + for (const [key, val] of entries) { + this.encoder.writeStr(key); + this.writeValue(val, mapSchema.values); + } + + // Write end-of-map marker + this.writeVarIntUnsigned(0); + } + + /** + * Writes a union value with schema validation. + */ + public writeUnion(value: unknown, schema: AvroUnionSchema, selectedIndex?: number): void { + if (!Array.isArray(schema)) { + throw new Error('Schema is not a union schema'); + } + + let index = selectedIndex; + if (index === undefined) { + // Find the first matching schema in the union + index = schema.findIndex(subSchema => this.validator.validateValue(value, subSchema)); + if (index === -1) { + throw new Error('Value does not match any schema in the union'); + } + } + + if (index < 0 || index >= schema.length) { + throw new Error('Invalid union index'); + } + + // Write union index + this.writeVarIntSigned(this.encodeZigZag32(index)); + + // Write the value according to the selected schema + this.writeValue(value, schema[index]); + } + + /** + * Writes a fixed value with schema validation. + */ + public writeFixed(value: Uint8Array, schema: AvroFixedSchema): void { + if (typeof schema === 'object' && schema.type !== 'fixed') { + throw new Error('Schema is not a fixed schema'); + } + + const fixedSchema = this.resolveSchema(schema) as AvroFixedSchema; + if (fixedSchema.type !== 'fixed') { + throw new Error('Schema is not a fixed schema'); + } + + if (value.length !== fixedSchema.size) { + throw new Error(`Fixed value length ${value.length} does not match schema size ${fixedSchema.size}`); + } + + this.writer.buf(value, value.length); + } + + /** + * Generic number writing with schema validation. + */ + public writeNumber(value: number, schema: AvroSchema): void { + const resolvedSchema = this.resolveSchema(schema); + const schemaType = typeof resolvedSchema === 'string' + ? resolvedSchema + : Array.isArray(resolvedSchema) + ? 'union' + : resolvedSchema.type; + + switch (schemaType) { + case 'int': + this.writeInt(value, schema); + break; + case 'long': + this.writeLong(value, schema); + break; + case 'float': + this.writeFloat(value, schema); + break; + case 'double': + this.writeDouble(value, schema); + break; + default: + throw new Error(`Schema type ${schemaType} is not a numeric type`); + } + } + + /** + * Writes a value according to its schema. + */ + private writeValue(value: unknown, schema: AvroSchema): void { + const resolvedSchema = this.resolveSchema(schema); + + if (typeof resolvedSchema === 'string') { + switch (resolvedSchema) { + case 'null': + this.encoder.writeNull(); + break; + case 'boolean': + this.encoder.writeBoolean(value as boolean); + break; + case 'int': + this.encoder.writeInt(value as number); + break; + case 'long': + this.encoder.writeLong(value as number | bigint); + break; + case 'float': + this.encoder.writeFloat(value as number); + break; + case 'double': + this.encoder.writeDouble(value as number); + break; + case 'bytes': + this.encoder.writeBin(value as Uint8Array); + break; + case 'string': + this.encoder.writeStr(value as string); + break; + default: + throw new Error(`Unknown primitive type: ${resolvedSchema}`); + } + return; + } + + if (Array.isArray(resolvedSchema)) { + this.writeUnion(value, resolvedSchema); + return; + } + + switch (resolvedSchema.type) { + case 'record': + this.writeRecord(value as Record, resolvedSchema); + break; + case 'enum': + this.writeEnum(value as string, resolvedSchema); + break; + case 'array': + this.writeArray(value as unknown[], resolvedSchema); + break; + case 'map': + this.writeMap(value as Record, resolvedSchema); + break; + case 'fixed': + this.writeFixed(value as Uint8Array, resolvedSchema); + break; + default: + throw new Error(`Unknown schema type: ${(resolvedSchema as any).type}`); + } + } + + private validateSchemaType(schema: AvroSchema, expectedType: string): void { + const resolvedSchema = this.resolveSchema(schema); + const actualType = typeof resolvedSchema === 'string' + ? resolvedSchema + : Array.isArray(resolvedSchema) + ? 'union' + : resolvedSchema.type; + + if (actualType !== expectedType) { + throw new Error(`Expected schema type ${expectedType}, got ${actualType}`); + } + } + + private resolveSchema(schema: AvroSchema): AvroSchema { + if (typeof schema === 'string') { + const namedSchema = this.namedSchemas.get(schema); + return namedSchema || schema; + } + return schema; + } + + private collectNamedSchemas(schema: AvroSchema): void { + if (typeof schema === 'string' || Array.isArray(schema)) { + return; + } + + if (typeof schema === 'object' && schema !== null) { + switch (schema.type) { + case 'record': + const recordSchema = schema as AvroRecordSchema; + const recordFullName = this.getFullName(recordSchema.name, recordSchema.namespace); + this.namedSchemas.set(recordFullName, recordSchema); + recordSchema.fields.forEach(field => this.collectNamedSchemas(field.type)); + break; + case 'enum': + const enumSchema = schema as AvroEnumSchema; + const enumFullName = this.getFullName(enumSchema.name, enumSchema.namespace); + this.namedSchemas.set(enumFullName, enumSchema); + break; + case 'fixed': + const fixedSchema = schema as AvroFixedSchema; + const fixedFullName = this.getFullName(fixedSchema.name, fixedSchema.namespace); + this.namedSchemas.set(fixedFullName, fixedSchema); + break; + case 'array': + this.collectNamedSchemas((schema as AvroArraySchema).items); + break; + case 'map': + this.collectNamedSchemas((schema as AvroMapSchema).values); + break; + } + } + } + + private getFullName(name: string, namespace?: string): string { + return namespace ? `${namespace}.${name}` : name; + } + + /** + * Writes a variable-length integer using Avro's encoding (for lengths) + */ + private writeVarIntUnsigned(value: number): void { + let n = value >>> 0; // Convert to unsigned 32-bit + while (n >= 0x80) { + this.writer.u8((n & 0x7f) | 0x80); + n >>>= 7; + } + this.writer.u8(n & 0x7f); + } + + /** + * Writes a variable-length integer using Avro's encoding (for signed values with zigzag) + */ + private writeVarIntSigned(value: number): void { + let n = value >>> 0; // Convert to unsigned 32-bit + while (n >= 0x80) { + this.writer.u8((n & 0x7f) | 0x80); + n >>>= 7; + } + this.writer.u8(n & 0x7f); + } + + /** + * Encodes a 32-bit integer using zigzag encoding + */ + private encodeZigZag32(value: number): number { + return (value << 1) ^ (value >> 31); + } +} \ No newline at end of file diff --git a/src/avro/AvroSchemaValidator.ts b/src/avro/AvroSchemaValidator.ts new file mode 100644 index 00000000..10bc016d --- /dev/null +++ b/src/avro/AvroSchemaValidator.ts @@ -0,0 +1,319 @@ +import type { + AvroSchema, + AvroNullSchema, + AvroBooleanSchema, + AvroIntSchema, + AvroLongSchema, + AvroFloatSchema, + AvroDoubleSchema, + AvroBytesSchema, + AvroStringSchema, + AvroRecordSchema, + AvroEnumSchema, + AvroArraySchema, + AvroMapSchema, + AvroUnionSchema, + AvroFixedSchema, + AvroRecordField, + AvroNamedSchema, +} from './types'; + +/** + * Validates Apache Avro schemas according to the specification. + * Based on https://avro.apache.org/docs/1.12.0/specification/ + */ +export class AvroSchemaValidator { + private namedSchemas = new Map(); + + /** + * Validates an Avro schema and resolves named schema references. + */ + public validateSchema(schema: AvroSchema): boolean { + this.namedSchemas.clear(); + return this.validateSchemaInternal(schema); + } + + /** + * Validates that a value conforms to the given Avro schema. + */ + public validateValue(value: unknown, schema: AvroSchema): boolean { + this.namedSchemas.clear(); + this.validateSchemaInternal(schema); + return this.validateValueAgainstSchema(value, schema); + } + + private validateSchemaInternal(schema: AvroSchema): boolean { + if (typeof schema === 'string') { + // String schema references (either primitive type or named type) + return this.validateStringSchema(schema); + } + + if (Array.isArray(schema)) { + // Union schema + return this.validateUnionSchema(schema); + } + + if (typeof schema === 'object' && schema !== null) { + switch (schema.type) { + case 'null': + return this.validateNullSchema(schema as AvroNullSchema); + case 'boolean': + return this.validateBooleanSchema(schema as AvroBooleanSchema); + case 'int': + return this.validateIntSchema(schema as AvroIntSchema); + case 'long': + return this.validateLongSchema(schema as AvroLongSchema); + case 'float': + return this.validateFloatSchema(schema as AvroFloatSchema); + case 'double': + return this.validateDoubleSchema(schema as AvroDoubleSchema); + case 'bytes': + return this.validateBytesSchema(schema as AvroBytesSchema); + case 'string': + return this.validateStringTypeSchema(schema as AvroStringSchema); + case 'record': + return this.validateRecordSchema(schema as AvroRecordSchema); + case 'enum': + return this.validateEnumSchema(schema as AvroEnumSchema); + case 'array': + return this.validateArraySchema(schema as AvroArraySchema); + case 'map': + return this.validateMapSchema(schema as AvroMapSchema); + case 'fixed': + return this.validateFixedSchema(schema as AvroFixedSchema); + default: + return false; + } + } + + return false; + } + + private validateStringSchema(schema: string): boolean { + const primitiveTypes = ['null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string']; + return primitiveTypes.includes(schema) || this.namedSchemas.has(schema); + } + + private validateUnionSchema(schema: AvroUnionSchema): boolean { + if (schema.length === 0) return false; + const typeSet = new Set(); + + for (const subSchema of schema) { + if (!this.validateSchemaInternal(subSchema)) return false; + + // Union types must be unique + const typeName = this.getSchemaTypeName(subSchema); + if (typeSet.has(typeName)) return false; + typeSet.add(typeName); + } + + return true; + } + + private validateNullSchema(schema: AvroNullSchema): boolean { + return schema.type === 'null'; + } + + private validateBooleanSchema(schema: AvroBooleanSchema): boolean { + return schema.type === 'boolean'; + } + + private validateIntSchema(schema: AvroIntSchema): boolean { + return schema.type === 'int'; + } + + private validateLongSchema(schema: AvroLongSchema): boolean { + return schema.type === 'long'; + } + + private validateFloatSchema(schema: AvroFloatSchema): boolean { + return schema.type === 'float'; + } + + private validateDoubleSchema(schema: AvroDoubleSchema): boolean { + return schema.type === 'double'; + } + + private validateBytesSchema(schema: AvroBytesSchema): boolean { + return schema.type === 'bytes'; + } + + private validateStringTypeSchema(schema: AvroStringSchema): boolean { + return schema.type === 'string'; + } + + private validateRecordSchema(schema: AvroRecordSchema): boolean { + if (schema.type !== 'record' || !schema.name || !Array.isArray(schema.fields)) return false; + + const fullName = this.getFullName(schema.name, schema.namespace); + if (this.namedSchemas.has(fullName)) return false; + this.namedSchemas.set(fullName, schema); + + const fieldNames = new Set(); + for (const field of schema.fields) { + if (!this.validateRecordField(field)) return false; + if (fieldNames.has(field.name)) return false; + fieldNames.add(field.name); + } + + return true; + } + + private validateRecordField(field: AvroRecordField): boolean { + return ( + typeof field.name === 'string' && + field.name.length > 0 && + this.validateSchemaInternal(field.type) + ); + } + + private validateEnumSchema(schema: AvroEnumSchema): boolean { + if (schema.type !== 'enum' || !schema.name || !Array.isArray(schema.symbols)) return false; + + const fullName = this.getFullName(schema.name, schema.namespace); + if (this.namedSchemas.has(fullName)) return false; + this.namedSchemas.set(fullName, schema); + + if (schema.symbols.length === 0) return false; + const symbolSet = new Set(); + for (const symbol of schema.symbols) { + if (typeof symbol !== 'string' || symbolSet.has(symbol)) return false; + symbolSet.add(symbol); + } + + // Default symbol must be in symbols array if provided + if (schema.default !== undefined && !schema.symbols.includes(schema.default)) return false; + + return true; + } + + private validateArraySchema(schema: AvroArraySchema): boolean { + return schema.type === 'array' && this.validateSchemaInternal(schema.items); + } + + private validateMapSchema(schema: AvroMapSchema): boolean { + return schema.type === 'map' && this.validateSchemaInternal(schema.values); + } + + private validateFixedSchema(schema: AvroFixedSchema): boolean { + if (schema.type !== 'fixed' || !schema.name || typeof schema.size !== 'number') return false; + if (schema.size < 0) return false; + + const fullName = this.getFullName(schema.name, schema.namespace); + if (this.namedSchemas.has(fullName)) return false; + this.namedSchemas.set(fullName, schema); + + return true; + } + + private validateValueAgainstSchema(value: unknown, schema: AvroSchema): boolean { + if (typeof schema === 'string') { + return this.validateValueAgainstStringSchema(value, schema); + } + + if (Array.isArray(schema)) { + // Union - value must match one of the schemas + return schema.some(subSchema => this.validateValueAgainstSchema(value, subSchema)); + } + + if (typeof schema === 'object' && schema !== null) { + switch (schema.type) { + case 'null': + return value === null; + case 'boolean': + return typeof value === 'boolean'; + case 'int': + return typeof value === 'number' && Number.isInteger(value) && value >= -2147483648 && value <= 2147483647; + case 'long': + return (typeof value === 'number' && Number.isInteger(value)) || typeof value === 'bigint'; + case 'float': + case 'double': + return typeof value === 'number'; + case 'bytes': + return value instanceof Uint8Array; + case 'string': + return typeof value === 'string'; + case 'record': + return this.validateValueAgainstRecord(value, schema as AvroRecordSchema); + case 'enum': + return this.validateValueAgainstEnum(value, schema as AvroEnumSchema); + case 'array': + return this.validateValueAgainstArray(value, schema as AvroArraySchema); + case 'map': + return this.validateValueAgainstMap(value, schema as AvroMapSchema); + case 'fixed': + return this.validateValueAgainstFixed(value, schema as AvroFixedSchema); + default: + return false; + } + } + + return false; + } + + private validateValueAgainstStringSchema(value: unknown, schema: string): boolean { + switch (schema) { + case 'null': + return value === null; + case 'boolean': + return typeof value === 'boolean'; + case 'int': + return typeof value === 'number' && Number.isInteger(value) && value >= -2147483648 && value <= 2147483647; + case 'long': + return (typeof value === 'number' && Number.isInteger(value)) || typeof value === 'bigint'; + case 'float': + case 'double': + return typeof value === 'number'; + case 'bytes': + return value instanceof Uint8Array; + case 'string': + return typeof value === 'string'; + default: + // Named schema reference + const namedSchema = this.namedSchemas.get(schema); + return namedSchema ? this.validateValueAgainstSchema(value, namedSchema) : false; + } + } + + private validateValueAgainstRecord(value: unknown, schema: AvroRecordSchema): boolean { + if (typeof value !== 'object' || value === null) return false; + const obj = value as Record; + + for (const field of schema.fields) { + const fieldValue = obj[field.name]; + if (fieldValue === undefined && field.default === undefined) return false; + if (fieldValue !== undefined && !this.validateValueAgainstSchema(fieldValue, field.type)) return false; + } + + return true; + } + + private validateValueAgainstEnum(value: unknown, schema: AvroEnumSchema): boolean { + return typeof value === 'string' && schema.symbols.includes(value); + } + + private validateValueAgainstArray(value: unknown, schema: AvroArraySchema): boolean { + if (!Array.isArray(value)) return false; + return value.every(item => this.validateValueAgainstSchema(item, schema.items)); + } + + private validateValueAgainstMap(value: unknown, schema: AvroMapSchema): boolean { + if (typeof value !== 'object' || value === null) return false; + const obj = value as Record; + return Object.values(obj).every(val => this.validateValueAgainstSchema(val, schema.values)); + } + + private validateValueAgainstFixed(value: unknown, schema: AvroFixedSchema): boolean { + return value instanceof Uint8Array && value.length === schema.size; + } + + private getSchemaTypeName(schema: AvroSchema): string { + if (typeof schema === 'string') return schema; + if (Array.isArray(schema)) return 'union'; + return schema.type; + } + + private getFullName(name: string, namespace?: string): string { + return namespace ? `${namespace}.${name}` : name; + } +} \ No newline at end of file diff --git a/src/avro/__tests__/AvroEncoder.spec.ts b/src/avro/__tests__/AvroEncoder.spec.ts new file mode 100644 index 00000000..fcd77787 --- /dev/null +++ b/src/avro/__tests__/AvroEncoder.spec.ts @@ -0,0 +1,287 @@ +import {Writer} from '@jsonjoy.com/util/lib/buffers/Writer'; +import {AvroEncoder} from '../AvroEncoder'; + +describe('AvroEncoder', () => { + let writer: Writer; + let encoder: AvroEncoder; + + beforeEach(() => { + writer = new Writer(); + encoder = new AvroEncoder(writer); + }); + + describe('primitive types', () => { + test('encodes null', () => { + encoder.writeNull(); + const result = writer.flush(); + expect(result.length).toBe(0); + }); + + test('encodes boolean true', () => { + encoder.writeBoolean(true); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([1])); + }); + + test('encodes boolean false', () => { + encoder.writeBoolean(false); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([0])); + }); + + test('encodes positive int', () => { + encoder.writeInt(42); + const result = writer.flush(); + // 42 in zigzag is 84, which is 0x54, encoded as single byte + expect(result).toEqual(new Uint8Array([84])); + }); + + test('encodes negative int', () => { + encoder.writeInt(-1); + const result = writer.flush(); + // -1 in zigzag is 1, encoded as single byte + expect(result).toEqual(new Uint8Array([1])); + }); + + test('encodes int with multiple bytes', () => { + encoder.writeInt(300); + const result = writer.flush(); + // 300 zigzag encoded should use multiple bytes + expect(result.length).toBeGreaterThan(1); + }); + + test('encodes long from number', () => { + encoder.writeLong(123456789); + const result = writer.flush(); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes long from bigint', () => { + encoder.writeLong(BigInt('123456789012345')); + const result = writer.flush(); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes float', () => { + encoder.writeFloatAvro(3.14); + const result = writer.flush(); + expect(result.length).toBe(4); // IEEE 754 single precision + }); + + test('encodes double', () => { + encoder.writeDouble(3.14159265359); + const result = writer.flush(); + expect(result.length).toBe(8); // IEEE 754 double precision + }); + + test('encodes bytes', () => { + const bytes = new Uint8Array([1, 2, 3, 4]); + encoder.writeBin(bytes); + const result = writer.flush(); + // Length-prefixed: length + data + expect(result[0]).toBe(4); // length 4 (not zigzag for lengths) + expect(result.slice(1)).toEqual(bytes); + }); + + test('encodes string', () => { + encoder.writeStr('hello'); + const result = writer.flush(); + // Length-prefixed UTF-8 + expect(result[0]).toBe(5); // length 5 (not zigzag for lengths) + expect(result.slice(1)).toEqual(new TextEncoder().encode('hello')); + }); + + test('encodes empty string', () => { + encoder.writeStr(''); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([0])); // length 0 + }); + + test('encodes UTF-8 string', () => { + encoder.writeStr('héllo'); + const result = writer.flush(); + const utf8Bytes = new TextEncoder().encode('héllo'); + expect(result[0]).toBe(utf8Bytes.length); // length (not zigzag) + expect(result.slice(1)).toEqual(utf8Bytes); + }); + }); + + describe('arrays', () => { + test('encodes empty array', () => { + encoder.writeArr([]); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([0, 0])); // length 0, end marker 0 + }); + + test('encodes array of integers', () => { + encoder.writeArr([1, 2, 3]); + const result = writer.flush(); + expect(result[0]).toBe(3); // length 3 (not zigzag) + // Followed by encoded integers and end marker + expect(result[result.length - 1]).toBe(0); // end marker + }); + + test('encodes array of mixed types', () => { + encoder.writeArr([1, 'hello', true]); + const result = writer.flush(); + expect(result[0]).toBe(3); // length 3 (not zigzag) + expect(result[result.length - 1]).toBe(0); // end marker + }); + }); + + describe('objects/maps', () => { + test('encodes empty object', () => { + encoder.writeObj({}); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([0, 0])); // length 0, end marker 0 + }); + + test('encodes simple object', () => { + encoder.writeObj({key: 'value'}); + const result = writer.flush(); + expect(result[0]).toBe(1); // length 1 (not zigzag) + expect(result[result.length - 1]).toBe(0); // end marker + }); + + test('encodes object with multiple keys', () => { + encoder.writeObj({a: 1, b: 'test'}); + const result = writer.flush(); + expect(result[0]).toBe(2); // length 2 (not zigzag) + expect(result[result.length - 1]).toBe(0); // end marker + }); + }); + + describe('encode method', () => { + test('encodes various types through encode method', () => { + const data = { + nullValue: null, + boolValue: true, + intValue: 42, + stringValue: 'test', + arrayValue: [1, 2, 3], + }; + + const result = encoder.encode(data); + expect(result.length).toBeGreaterThan(0); + }); + + test('handles unknown types', () => { + const result = encoder.encode(new Date()); + expect(result.length).toBe(0); // writeUnknown calls writeNull + }); + + test('handles undefined', () => { + const result = encoder.encode(undefined); + expect(result.length).toBe(0); // writeNull + }); + + test('handles bigint', () => { + const result = encoder.encode(BigInt(123)); + expect(result.length).toBeGreaterThan(0); + }); + + test('handles Uint8Array', () => { + const bytes = new Uint8Array([1, 2, 3]); + const result = encoder.encode(bytes); + expect(result[0]).toBe(3); // length 3 (not zigzag) + expect(result.slice(1, 4)).toEqual(bytes); + }); + }); + + describe('BinaryJsonEncoder interface methods', () => { + test('writeNumber chooses appropriate type', () => { + // Integer in int range + encoder.writeNumber(42); + let result = writer.flush(); + expect(result).toEqual(new Uint8Array([84])); // 42 zigzag encoded + + writer.reset(); + + // Integer outside int range + encoder.writeNumber(3000000000); + result = writer.flush(); + expect(result.length).toBeGreaterThan(1); + + writer.reset(); + + // Float + encoder.writeNumber(3.14); + result = writer.flush(); + expect(result.length).toBe(8); // double precision + }); + + test('writeInteger', () => { + encoder.writeInteger(63); // 63 zigzag = 126, which fits in one byte + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([126])); // 63 zigzag encoded is 126 + }); + + test('writeUInteger', () => { + encoder.writeUInteger(63); // same as writeInteger in our implementation + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([126])); // same as writeInteger + }); + + test('writeFloat interface method', () => { + encoder.writeFloat(3.14); + const result = writer.flush(); + expect(result.length).toBe(4); // float precision through interface + }); + + test('writeAsciiStr', () => { + encoder.writeAsciiStr('test'); + const result = writer.flush(); + expect(result[0]).toBe(4); // length 4 (not zigzag) + expect(result.slice(1)).toEqual(new TextEncoder().encode('test')); + }); + }); + + describe('edge cases', () => { + test('encodes very large numbers', () => { + const largeInt = 2147483647; // max int32 + encoder.writeInt(largeInt); + const result = writer.flush(); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes negative numbers correctly', () => { + encoder.writeInt(-2147483648); // min int32 + const result = writer.flush(); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes special float values', () => { + writer.reset(); + encoder.writeFloatAvro(Infinity); + let result = writer.flush(); + expect(result.length).toBe(4); + + writer.reset(); + encoder.writeFloatAvro(-Infinity); + result = writer.flush(); + expect(result.length).toBe(4); + + writer.reset(); + encoder.writeFloatAvro(NaN); + result = writer.flush(); + expect(result.length).toBe(4); + }); + + test('encodes special double values', () => { + writer.reset(); + encoder.writeDouble(Infinity); + let result = writer.flush(); + expect(result.length).toBe(8); + + writer.reset(); + encoder.writeDouble(-Infinity); + result = writer.flush(); + expect(result.length).toBe(8); + + writer.reset(); + encoder.writeDouble(NaN); + result = writer.flush(); + expect(result.length).toBe(8); + }); + }); +}); \ No newline at end of file diff --git a/src/avro/__tests__/AvroSchemaEncoder.spec.ts b/src/avro/__tests__/AvroSchemaEncoder.spec.ts new file mode 100644 index 00000000..9e47d557 --- /dev/null +++ b/src/avro/__tests__/AvroSchemaEncoder.spec.ts @@ -0,0 +1,406 @@ +import {Writer} from '@jsonjoy.com/util/lib/buffers/Writer'; +import {AvroSchemaEncoder} from '../AvroSchemaEncoder'; +import type { + AvroRecordSchema, + AvroEnumSchema, + AvroArraySchema, + AvroMapSchema, + AvroUnionSchema, + AvroFixedSchema, +} from '../types'; + +describe('AvroSchemaEncoder', () => { + let writer: Writer; + let encoder: AvroSchemaEncoder; + + beforeEach(() => { + writer = new Writer(); + encoder = new AvroSchemaEncoder(writer); + }); + + describe('primitive types with schema validation', () => { + test('encodes null with null schema', () => { + const result = encoder.encode(null, 'null'); + expect(result.length).toBe(0); + }); + + test('throws on null with non-null schema', () => { + expect(() => encoder.encode(null, 'string')).toThrow(); + }); + + test('encodes boolean with boolean schema', () => { + const result = encoder.encode(true, 'boolean'); + expect(result).toEqual(new Uint8Array([1])); + }); + + test('throws on boolean with non-boolean schema', () => { + expect(() => encoder.encode(true, 'string')).toThrow(); + }); + + test('encodes int with int schema', () => { + const result = encoder.encode(42, 'int'); + expect(result).toEqual(new Uint8Array([84])); // 42 zigzag encoded + }); + + test('throws on int out of range', () => { + expect(() => encoder.encode(3000000000, 'int')).toThrow(); + expect(() => encoder.encode(3.14, 'int')).toThrow(); + }); + + test('encodes long with long schema', () => { + const result = encoder.encode(123456789, 'long'); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes bigint long with long schema', () => { + const result = encoder.encode(BigInt('123456789012345'), 'long'); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes float with float schema', () => { + const result = encoder.encode(3.14, 'float'); + expect(result.length).toBe(4); + }); + + test('encodes double with double schema', () => { + const result = encoder.encode(3.14159, 'double'); + expect(result.length).toBe(8); + }); + + test('encodes bytes with bytes schema', () => { + const bytes = new Uint8Array([1, 2, 3]); + const result = encoder.encode(bytes, 'bytes'); + expect(result[0]).toBe(3); // length 3 (not zigzag) + expect(result.slice(1)).toEqual(bytes); + }); + + test('encodes string with string schema', () => { + const result = encoder.encode('hello', 'string'); + expect(result[0]).toBe(5); // length 5 (not zigzag) + expect(result.slice(1)).toEqual(new TextEncoder().encode('hello')); + }); + }); + + describe('record schemas', () => { + test('encodes simple record', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string'}, + ], + }; + + const value = {id: 42, name: 'John'}; + const result = encoder.encode(value, schema); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes record with default values', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string', default: 'Unknown'}, + ], + }; + + const value = {id: 42}; // name is missing but has default + const result = encoder.encode(value, schema); + expect(result.length).toBeGreaterThan(0); + }); + + test('throws on missing required field', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string'}, + ], + }; + + const value = {id: 42}; // name is missing and required + expect(() => encoder.encode(value, schema)).toThrow(); + }); + + test('throws on wrong field type', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string'}, + ], + }; + + const value = {id: '42', name: 'John'}; // id should be int + expect(() => encoder.encode(value, schema)).toThrow(); + }); + }); + + describe('enum schemas', () => { + test('encodes valid enum value', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'BLUE'], + }; + + const result = encoder.encode('GREEN', schema); + expect(result).toEqual(new Uint8Array([2])); // index 1 zigzag encoded is 2 + }); + + test('throws on invalid enum value', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'BLUE'], + }; + + expect(() => encoder.encode('YELLOW', schema)).toThrow(); + }); + }); + + describe('array schemas', () => { + test('encodes array of primitives', () => { + const schema: AvroArraySchema = { + type: 'array', + items: 'string', + }; + + const value = ['hello', 'world']; + const result = encoder.encode(value, schema); + expect(result[0]).toBe(2); // length 2 (not zigzag) + expect(result[result.length - 1]).toBe(0); // end marker + }); + + test('encodes empty array', () => { + const schema: AvroArraySchema = { + type: 'array', + items: 'int', + }; + + const result = encoder.encode([], schema); + expect(result).toEqual(new Uint8Array([0, 0])); // length 0, end marker + }); + + test('throws on wrong item type', () => { + const schema: AvroArraySchema = { + type: 'array', + items: 'int', + }; + + expect(() => encoder.encode([1, 'two', 3], schema)).toThrow(); + }); + }); + + describe('map schemas', () => { + test('encodes map of primitives', () => { + const schema: AvroMapSchema = { + type: 'map', + values: 'int', + }; + + const value = {a: 1, b: 2}; + const result = encoder.encode(value, schema); + expect(result[0]).toBe(2); // length 2 (not zigzag) + expect(result[result.length - 1]).toBe(0); // end marker + }); + + test('encodes empty map', () => { + const schema: AvroMapSchema = { + type: 'map', + values: 'string', + }; + + const result = encoder.encode({}, schema); + expect(result).toEqual(new Uint8Array([0, 0])); // length 0, end marker + }); + + test('throws on wrong value type', () => { + const schema: AvroMapSchema = { + type: 'map', + values: 'int', + }; + + expect(() => encoder.encode({a: 1, b: 'two'}, schema)).toThrow(); + }); + }); + + describe('union schemas', () => { + test('encodes union value with automatic type detection', () => { + const schema: AvroUnionSchema = ['null', 'string', 'int']; + + // String value + let result = encoder.encode('hello', schema); + expect(result[0]).toBe(2); // index 1 zigzag (string is at index 1) + + // Null value + result = encoder.encode(null, schema); + expect(result[0]).toBe(0); // index 0 zigzag (null is at index 0) + + // Int value + result = encoder.encode(42, schema); + expect(result[0]).toBe(4); // index 2 zigzag (int is at index 2) + }); + + test('encodes union value with explicit index', () => { + const schema: AvroUnionSchema = ['null', 'string']; + + const result = encoder.encode('hello', schema, 1); + expect(result[0]).toBe(2); // index 1 zigzag encoded is 2 + }); + + test('throws on value not matching any union type', () => { + const schema: AvroUnionSchema = ['null', 'string']; + + expect(() => encoder.encode(42, schema)).toThrow(); + }); + + test('throws on invalid union index', () => { + const schema: AvroUnionSchema = ['null', 'string']; + + expect(() => encoder.encode('hello', schema, 5)).toThrow(); + }); + }); + + describe('fixed schemas', () => { + test('encodes fixed-length data', () => { + const schema: AvroFixedSchema = { + type: 'fixed', + name: 'Hash', + size: 4, + }; + + const value = new Uint8Array([1, 2, 3, 4]); + const result = encoder.encode(value, schema); + expect(result).toEqual(value); + }); + + test('throws on wrong fixed length', () => { + const schema: AvroFixedSchema = { + type: 'fixed', + name: 'Hash', + size: 4, + }; + + expect(() => encoder.encode(new Uint8Array([1, 2, 3]), schema)).toThrow(); + expect(() => encoder.encode(new Uint8Array([1, 2, 3, 4, 5]), schema)).toThrow(); + }); + }); + + describe('schema validation', () => { + test('throws on invalid schema', () => { + const invalidSchema = {type: 'invalid'} as any; + expect(() => encoder.encode('test', invalidSchema)).toThrow('Invalid Avro schema'); + }); + + test('throws on value not conforming to schema', () => { + // This should be caught by value validation + expect(() => encoder.encode(42, 'string')).toThrow(); + }); + }); + + describe('typed write methods', () => { + test('writeNull with schema validation', () => { + encoder.writeNull('null'); + const result = writer.flush(); + expect(result.length).toBe(0); + + expect(() => encoder.writeNull('string')).toThrow(); + }); + + test('writeBoolean with schema validation', () => { + encoder.writeBoolean(true, 'boolean'); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([1])); + + expect(() => encoder.writeBoolean(true, 'string')).toThrow(); + }); + + test('writeInt with schema validation', () => { + encoder.writeInt(42, 'int'); + const result = writer.flush(); + expect(result).toEqual(new Uint8Array([84])); + + expect(() => encoder.writeInt(42, 'string')).toThrow(); + expect(() => encoder.writeInt(3000000000, 'int')).toThrow(); + }); + + test('writeNumber with different schemas', () => { + writer.reset(); + encoder.writeNumber(42, 'int'); + let result = writer.flush(); + expect(result).toEqual(new Uint8Array([84])); + + writer.reset(); + encoder.writeNumber(42, 'long'); + result = writer.flush(); + expect(result.length).toBeGreaterThan(0); + + writer.reset(); + encoder.writeNumber(3.14, 'float'); + result = writer.flush(); + expect(result.length).toBe(4); + + writer.reset(); + encoder.writeNumber(3.14, 'double'); + result = writer.flush(); + expect(result.length).toBe(8); + + expect(() => encoder.writeNumber(42, 'string')).toThrow(); + }); + }); + + describe('complex nested schemas', () => { + test('encodes nested record with arrays and maps', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'ComplexRecord', + fields: [ + {name: 'id', type: 'int'}, + {name: 'tags', type: {type: 'array', items: 'string'}}, + {name: 'metadata', type: {type: 'map', values: 'string'}}, + { + name: 'status', + type: {type: 'enum', name: 'Status', symbols: ['ACTIVE', 'INACTIVE']}, + }, + ], + }; + + const value = { + id: 123, + tags: ['tag1', 'tag2'], + metadata: {key1: 'value1', key2: 'value2'}, + status: 'ACTIVE', + }; + + const result = encoder.encode(value, schema); + expect(result.length).toBeGreaterThan(0); + }); + + test('encodes record with union fields', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'RecordWithUnion', + fields: [ + {name: 'id', type: 'int'}, + {name: 'optionalField', type: ['null', 'string']}, + ], + }; + + // With null value + let value: {id: number; optionalField: null | string} = {id: 1, optionalField: null}; + let result = encoder.encode(value, schema); + expect(result.length).toBeGreaterThan(0); + + // With string value + value = {id: 1, optionalField: 'test'}; + result = encoder.encode(value, schema); + expect(result.length).toBeGreaterThan(0); + }); + }); +}); \ No newline at end of file diff --git a/src/avro/__tests__/AvroSchemaValidator.spec.ts b/src/avro/__tests__/AvroSchemaValidator.spec.ts new file mode 100644 index 00000000..b63ab5e6 --- /dev/null +++ b/src/avro/__tests__/AvroSchemaValidator.spec.ts @@ -0,0 +1,360 @@ +import {AvroSchemaValidator} from '../AvroSchemaValidator'; +import type { + AvroSchema, + AvroRecordSchema, + AvroEnumSchema, + AvroArraySchema, + AvroMapSchema, + AvroUnionSchema, + AvroFixedSchema, +} from '../types'; + +describe('AvroSchemaValidator', () => { + let validator: AvroSchemaValidator; + + beforeEach(() => { + validator = new AvroSchemaValidator(); + }); + + describe('primitive schemas', () => { + test('validates null schema', () => { + expect(validator.validateSchema('null')).toBe(true); + expect(validator.validateSchema({type: 'null'})).toBe(true); + }); + + test('validates boolean schema', () => { + expect(validator.validateSchema('boolean')).toBe(true); + expect(validator.validateSchema({type: 'boolean'})).toBe(true); + }); + + test('validates int schema', () => { + expect(validator.validateSchema('int')).toBe(true); + expect(validator.validateSchema({type: 'int'})).toBe(true); + }); + + test('validates long schema', () => { + expect(validator.validateSchema('long')).toBe(true); + expect(validator.validateSchema({type: 'long'})).toBe(true); + }); + + test('validates float schema', () => { + expect(validator.validateSchema('float')).toBe(true); + expect(validator.validateSchema({type: 'float'})).toBe(true); + }); + + test('validates double schema', () => { + expect(validator.validateSchema('double')).toBe(true); + expect(validator.validateSchema({type: 'double'})).toBe(true); + }); + + test('validates bytes schema', () => { + expect(validator.validateSchema('bytes')).toBe(true); + expect(validator.validateSchema({type: 'bytes'})).toBe(true); + }); + + test('validates string schema', () => { + expect(validator.validateSchema('string')).toBe(true); + expect(validator.validateSchema({type: 'string'})).toBe(true); + }); + }); + + describe('record schemas', () => { + test('validates simple record schema', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string'}, + ], + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('validates record with default values', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string', default: 'Unknown'}, + ], + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('rejects record without name', () => { + const schema = { + type: 'record', + fields: [{name: 'id', type: 'int'}], + } as any; + expect(validator.validateSchema(schema)).toBe(false); + }); + + test('rejects record with duplicate field names', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'id', type: 'string'}, + ], + }; + expect(validator.validateSchema(schema)).toBe(false); + }); + }); + + describe('enum schemas', () => { + test('validates simple enum schema', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'BLUE'], + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('validates enum with default', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'BLUE'], + default: 'RED', + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('rejects enum without symbols', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: [], + }; + expect(validator.validateSchema(schema)).toBe(false); + }); + + test('rejects enum with duplicate symbols', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'RED'], + }; + expect(validator.validateSchema(schema)).toBe(false); + }); + + test('rejects enum with invalid default', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'BLUE'], + default: 'YELLOW', + }; + expect(validator.validateSchema(schema)).toBe(false); + }); + }); + + describe('array schemas', () => { + test('validates simple array schema', () => { + const schema: AvroArraySchema = { + type: 'array', + items: 'string', + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('validates nested array schema', () => { + const schema: AvroArraySchema = { + type: 'array', + items: { + type: 'array', + items: 'int', + }, + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + }); + + describe('map schemas', () => { + test('validates simple map schema', () => { + const schema: AvroMapSchema = { + type: 'map', + values: 'string', + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('validates complex map schema', () => { + const schema: AvroMapSchema = { + type: 'map', + values: { + type: 'record', + name: 'Value', + fields: [{name: 'data', type: 'string'}], + }, + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + }); + + describe('union schemas', () => { + test('validates simple union schema', () => { + const schema: AvroUnionSchema = ['null', 'string']; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('validates complex union schema', () => { + const schema: AvroUnionSchema = [ + 'null', + 'string', + {type: 'record', name: 'User', fields: [{name: 'id', type: 'int'}]}, + ]; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('rejects empty union', () => { + const schema: AvroUnionSchema = []; + expect(validator.validateSchema(schema)).toBe(false); + }); + + test('rejects union with duplicate types', () => { + const schema: AvroUnionSchema = ['string', 'string']; + expect(validator.validateSchema(schema)).toBe(false); + }); + }); + + describe('fixed schemas', () => { + test('validates simple fixed schema', () => { + const schema: AvroFixedSchema = { + type: 'fixed', + name: 'Hash', + size: 16, + }; + expect(validator.validateSchema(schema)).toBe(true); + }); + + test('rejects fixed with negative size', () => { + const schema: AvroFixedSchema = { + type: 'fixed', + name: 'Hash', + size: -1, + }; + expect(validator.validateSchema(schema)).toBe(false); + }); + }); + + describe('value validation', () => { + test('validates null values', () => { + expect(validator.validateValue(null, 'null')).toBe(true); + expect(validator.validateValue(undefined, 'null')).toBe(false); + }); + + test('validates boolean values', () => { + expect(validator.validateValue(true, 'boolean')).toBe(true); + expect(validator.validateValue(false, 'boolean')).toBe(true); + expect(validator.validateValue('true', 'boolean')).toBe(false); + }); + + test('validates int values', () => { + expect(validator.validateValue(42, 'int')).toBe(true); + expect(validator.validateValue(-42, 'int')).toBe(true); + expect(validator.validateValue(2147483647, 'int')).toBe(true); + expect(validator.validateValue(-2147483648, 'int')).toBe(true); + expect(validator.validateValue(2147483648, 'int')).toBe(false); + expect(validator.validateValue(3.14, 'int')).toBe(false); + }); + + test('validates long values', () => { + expect(validator.validateValue(42, 'long')).toBe(true); + expect(validator.validateValue(BigInt(42), 'long')).toBe(true); + expect(validator.validateValue(3.14, 'long')).toBe(false); + }); + + test('validates float and double values', () => { + expect(validator.validateValue(3.14, 'float')).toBe(true); + expect(validator.validateValue(42, 'float')).toBe(true); + expect(validator.validateValue(3.14, 'double')).toBe(true); + expect(validator.validateValue('3.14', 'float')).toBe(false); + }); + + test('validates bytes values', () => { + expect(validator.validateValue(new Uint8Array([1, 2, 3]), 'bytes')).toBe(true); + expect(validator.validateValue([1, 2, 3], 'bytes')).toBe(false); + }); + + test('validates string values', () => { + expect(validator.validateValue('hello', 'string')).toBe(true); + expect(validator.validateValue('', 'string')).toBe(true); + expect(validator.validateValue(42, 'string')).toBe(false); + }); + + test('validates record values', () => { + const schema: AvroRecordSchema = { + type: 'record', + name: 'User', + fields: [ + {name: 'id', type: 'int'}, + {name: 'name', type: 'string'}, + ], + }; + + expect(validator.validateValue({id: 1, name: 'John'}, schema)).toBe(true); + expect(validator.validateValue({id: 1}, schema)).toBe(false); // missing required field + expect(validator.validateValue({id: '1', name: 'John'}, schema)).toBe(false); // wrong type + }); + + test('validates enum values', () => { + const schema: AvroEnumSchema = { + type: 'enum', + name: 'Color', + symbols: ['RED', 'GREEN', 'BLUE'], + }; + + expect(validator.validateValue('RED', schema)).toBe(true); + expect(validator.validateValue('YELLOW', schema)).toBe(false); + expect(validator.validateValue(0, schema)).toBe(false); + }); + + test('validates array values', () => { + const schema: AvroArraySchema = { + type: 'array', + items: 'string', + }; + + expect(validator.validateValue(['a', 'b', 'c'], schema)).toBe(true); + expect(validator.validateValue([], schema)).toBe(true); + expect(validator.validateValue(['a', 1, 'c'], schema)).toBe(false); + }); + + test('validates map values', () => { + const schema: AvroMapSchema = { + type: 'map', + values: 'int', + }; + + expect(validator.validateValue({a: 1, b: 2}, schema)).toBe(true); + expect(validator.validateValue({}, schema)).toBe(true); + expect(validator.validateValue({a: 1, b: 'two'}, schema)).toBe(false); + }); + + test('validates union values', () => { + const schema: AvroUnionSchema = ['null', 'string', 'int']; + + expect(validator.validateValue(null, schema)).toBe(true); + expect(validator.validateValue('hello', schema)).toBe(true); + expect(validator.validateValue(42, schema)).toBe(true); + expect(validator.validateValue(3.14, schema)).toBe(false); + }); + + test('validates fixed values', () => { + const schema: AvroFixedSchema = { + type: 'fixed', + name: 'Hash', + size: 4, + }; + + expect(validator.validateValue(new Uint8Array([1, 2, 3, 4]), schema)).toBe(true); + expect(validator.validateValue(new Uint8Array([1, 2, 3]), schema)).toBe(false); + expect(validator.validateValue(new Uint8Array([1, 2, 3, 4, 5]), schema)).toBe(false); + }); + }); +}); \ No newline at end of file diff --git a/src/avro/index.ts b/src/avro/index.ts new file mode 100644 index 00000000..a820c5aa --- /dev/null +++ b/src/avro/index.ts @@ -0,0 +1,4 @@ +export * from './types'; +export * from './AvroSchemaValidator'; +export * from './AvroEncoder'; +export * from './AvroSchemaEncoder'; \ No newline at end of file From d6842c42aefe09d2e868a5324a998565d4f63686 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:09:59 +0000 Subject: [PATCH 3/4] perf: optimize Avro encoder for better performance Co-authored-by: streamich <9773803+streamich@users.noreply.github.com> --- src/avro/AvroEncoder.ts | 84 ++++++++++++++++++++++++----------- src/avro/AvroSchemaEncoder.ts | 29 +++++++----- 2 files changed, 77 insertions(+), 36 deletions(-) diff --git a/src/avro/AvroEncoder.ts b/src/avro/AvroEncoder.ts index 618ff484..9b11ca6a 100644 --- a/src/avro/AvroEncoder.ts +++ b/src/avro/AvroEncoder.ts @@ -90,18 +90,20 @@ export class AvroEncoder implements BinaryJsonEncoder { * Writes an Avro float value using IEEE 754 single-precision. */ public writeFloatAvro(float: number): void { - this.writer.ensureCapacity(4); - this.writer.view.setFloat32(this.writer.x, float, true); // little-endian - this.writer.move(4); + const writer = this.writer; + writer.ensureCapacity(4); + writer.view.setFloat32(writer.x, float, true); // little-endian + writer.move(4); } /** * Writes an Avro double value using IEEE 754 double-precision. */ public writeDouble(double: number): void { - this.writer.ensureCapacity(8); - this.writer.view.setFloat64(this.writer.x, double, true); // little-endian - this.writer.move(8); + const writer = this.writer; + writer.ensureCapacity(8); + writer.view.setFloat64(writer.x, double, true); // little-endian + writer.move(8); } /** @@ -116,9 +118,32 @@ export class AvroEncoder implements BinaryJsonEncoder { * Writes an Avro string value with UTF-8 encoding and length prefix. */ public writeStr(str: string): void { - const bytes = new TextEncoder().encode(str); - this.writeVarIntUnsigned(bytes.length); - this.writer.buf(bytes, bytes.length); + const writer = this.writer; + const maxSize = str.length * 4; // Max UTF-8 bytes for string + writer.ensureCapacity(5 + maxSize); // 5 bytes max for varint length + + // Reserve space for length (we'll come back to fill this) + const lengthOffset = writer.x; + writer.x += 5; // Max varint size + + // Write the string and get actual byte count + const bytesWritten = writer.utf8(str); + const endPos = writer.x; + + // Go back to encode the actual length + writer.x = lengthOffset; + this.writeVarIntUnsigned(bytesWritten); + const actualLengthSize = writer.x - lengthOffset; + + // If we reserved more space than needed, shift the string data + if (actualLengthSize < 5) { + const stringStart = lengthOffset + 5; + const stringData = writer.uint8.slice(stringStart, endPos); + writer.x = lengthOffset + actualLengthSize; + writer.buf(stringData, stringData.length); + } else { + writer.x = endPos; + } } /** @@ -126,8 +151,9 @@ export class AvroEncoder implements BinaryJsonEncoder { */ public writeArr(arr: unknown[]): void { this.writeVarIntUnsigned(arr.length); - for (const item of arr) { - this.writeAny(item); + const length = arr.length; + for (let i = 0; i < length; i++) { + this.writeAny(arr[i]); } this.writeVarIntUnsigned(0); // End of array marker } @@ -137,10 +163,12 @@ export class AvroEncoder implements BinaryJsonEncoder { */ public writeObj(obj: Record): void { const entries = Object.entries(obj); - this.writeVarIntUnsigned(entries.length); - for (const [key, value] of entries) { - this.writeStr(key); - this.writeAny(value); + const length = entries.length; + this.writeVarIntUnsigned(length); + for (let i = 0; i < length; i++) { + const entry = entries[i]; + this.writeStr(entry[0]); + this.writeAny(entry[1]); } this.writeVarIntUnsigned(0); // End of map marker } @@ -187,16 +215,19 @@ export class AvroEncoder implements BinaryJsonEncoder { * Writes a float value using IEEE 754 single-precision. */ private writeFloatValue(float: number): void { - this.writer.ensureCapacity(4); - this.writer.view.setFloat32(this.writer.x, float, true); // little-endian - this.writer.move(4); + const writer = this.writer; + writer.ensureCapacity(4); + writer.view.setFloat32(writer.x, float, true); // little-endian + writer.move(4); } /** * Writes an ASCII string (same as regular string in Avro) */ public writeAsciiStr(str: string): void { - this.writeStr(str); + const writer = this.writer; + this.writeVarIntUnsigned(str.length); + writer.ascii(str); } // Utility methods for Avro encoding @@ -205,39 +236,42 @@ export class AvroEncoder implements BinaryJsonEncoder { * Encodes a variable-length integer (for signed values with zigzag) */ private writeVarIntSigned(value: number): void { + const writer = this.writer; let n = value >>> 0; // Convert to unsigned 32-bit while (n >= 0x80) { - this.writer.u8((n & 0x7f) | 0x80); + writer.u8((n & 0x7f) | 0x80); n >>>= 7; } - this.writer.u8(n & 0x7f); + writer.u8(n & 0x7f); } /** * Encodes a variable-length integer (for unsigned values like lengths) */ private writeVarIntUnsigned(value: number): void { + const writer = this.writer; let n = value >>> 0; // Convert to unsigned 32-bit while (n >= 0x80) { - this.writer.u8((n & 0x7f) | 0x80); + writer.u8((n & 0x7f) | 0x80); n >>>= 7; } - this.writer.u8(n & 0x7f); + writer.u8(n & 0x7f); } /** * Encodes a variable-length long using Avro's encoding */ private writeVarLong(value: bigint): void { + const writer = this.writer; let n = value; const mask = BigInt(0x7f); const shift = BigInt(7); while (n >= BigInt(0x80)) { - this.writer.u8(Number((n & mask) | BigInt(0x80))); + writer.u8(Number((n & mask) | BigInt(0x80))); n >>= shift; } - this.writer.u8(Number(n & mask)); + writer.u8(Number(n & mask)); } /** diff --git a/src/avro/AvroSchemaEncoder.ts b/src/avro/AvroSchemaEncoder.ts index c717c73d..d2663453 100644 --- a/src/avro/AvroSchemaEncoder.ts +++ b/src/avro/AvroSchemaEncoder.ts @@ -10,6 +10,7 @@ import type { AvroUnionSchema, AvroFixedSchema, AvroNamedSchema, + AvroNullSchema, } from './types'; /** @@ -58,7 +59,7 @@ export class AvroSchemaEncoder { /** * Writes a null value with schema validation. */ - public writeNull(schema: AvroSchema): void { + public writeNull(schema: AvroNullSchema | AvroSchema): void { this.validateSchemaType(schema, 'null'); this.encoder.writeNull(); } @@ -135,7 +136,8 @@ export class AvroSchemaEncoder { throw new Error('Schema is not a record schema'); } - for (const field of recordSchema.fields) { + for (let i = 0; i < recordSchema.fields.length; i++) { + const field = recordSchema.fields[i]; const fieldValue = value[field.name]; if (fieldValue !== undefined) { this.writeValue(fieldValue, field.type); @@ -185,8 +187,9 @@ export class AvroSchemaEncoder { this.writeVarIntUnsigned(value.length); // Write array items - for (const item of value) { - this.writeValue(item, arraySchema.items); + const length = value.length; + for (let i = 0; i < length; i++) { + this.writeValue(value[i], arraySchema.items); } // Write end-of-array marker @@ -212,9 +215,11 @@ export class AvroSchemaEncoder { this.writeVarIntUnsigned(entries.length); // Write map entries - for (const [key, val] of entries) { - this.encoder.writeStr(key); - this.writeValue(val, mapSchema.values); + const length = entries.length; + for (let i = 0; i < length; i++) { + const entry = entries[i]; + this.encoder.writeStr(entry[0]); + this.writeValue(entry[1], mapSchema.values); } // Write end-of-map marker @@ -424,24 +429,26 @@ export class AvroSchemaEncoder { * Writes a variable-length integer using Avro's encoding (for lengths) */ private writeVarIntUnsigned(value: number): void { + const writer = this.writer; let n = value >>> 0; // Convert to unsigned 32-bit while (n >= 0x80) { - this.writer.u8((n & 0x7f) | 0x80); + writer.u8((n & 0x7f) | 0x80); n >>>= 7; } - this.writer.u8(n & 0x7f); + writer.u8(n & 0x7f); } /** * Writes a variable-length integer using Avro's encoding (for signed values with zigzag) */ private writeVarIntSigned(value: number): void { + const writer = this.writer; let n = value >>> 0; // Convert to unsigned 32-bit while (n >= 0x80) { - this.writer.u8((n & 0x7f) | 0x80); + writer.u8((n & 0x7f) | 0x80); n >>>= 7; } - this.writer.u8(n & 0x7f); + writer.u8(n & 0x7f); } /** From b2ab5bf86bbcbccaf4956c59c119e0a2d1fd48b7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 1 Aug 2025 20:16:09 +0000 Subject: [PATCH 4/4] style: fix code formatting with prettier Co-authored-by: streamich <9773803+streamich@users.noreply.github.com> --- src/avro/AvroEncoder.ts | 12 ++-- src/avro/AvroSchemaEncoder.ts | 68 ++++++++++--------- src/avro/AvroSchemaValidator.ts | 26 +++---- src/avro/__tests__/AvroEncoder.spec.ts | 6 +- src/avro/__tests__/AvroSchemaEncoder.spec.ts | 4 +- .../__tests__/AvroSchemaValidator.spec.ts | 2 +- src/avro/index.ts | 2 +- 7 files changed, 59 insertions(+), 61 deletions(-) diff --git a/src/avro/AvroEncoder.ts b/src/avro/AvroEncoder.ts index 9b11ca6a..27bda9b7 100644 --- a/src/avro/AvroEncoder.ts +++ b/src/avro/AvroEncoder.ts @@ -121,20 +121,20 @@ export class AvroEncoder implements BinaryJsonEncoder { const writer = this.writer; const maxSize = str.length * 4; // Max UTF-8 bytes for string writer.ensureCapacity(5 + maxSize); // 5 bytes max for varint length - + // Reserve space for length (we'll come back to fill this) const lengthOffset = writer.x; writer.x += 5; // Max varint size - + // Write the string and get actual byte count const bytesWritten = writer.utf8(str); const endPos = writer.x; - + // Go back to encode the actual length writer.x = lengthOffset; this.writeVarIntUnsigned(bytesWritten); const actualLengthSize = writer.x - lengthOffset; - + // If we reserved more space than needed, shift the string data if (actualLengthSize < 5) { const stringStart = lengthOffset + 5; @@ -266,7 +266,7 @@ export class AvroEncoder implements BinaryJsonEncoder { let n = value; const mask = BigInt(0x7f); const shift = BigInt(7); - + while (n >= BigInt(0x80)) { writer.u8(Number((n & mask) | BigInt(0x80))); n >>= shift; @@ -287,4 +287,4 @@ export class AvroEncoder implements BinaryJsonEncoder { private encodeZigZag64(value: bigint): bigint { return (value << BigInt(1)) ^ (value >> BigInt(63)); } -} \ No newline at end of file +} diff --git a/src/avro/AvroSchemaEncoder.ts b/src/avro/AvroSchemaEncoder.ts index d2663453..0455039a 100644 --- a/src/avro/AvroSchemaEncoder.ts +++ b/src/avro/AvroSchemaEncoder.ts @@ -34,25 +34,25 @@ export class AvroSchemaEncoder { public encode(value: unknown, schema: AvroSchema, selectedIndex?: number): Uint8Array { this.writer.reset(); this.namedSchemas.clear(); - + // Validate schema first if (!this.validator.validateSchema(schema)) { throw new Error('Invalid Avro schema'); } - + // Validate value against schema if (!this.validator.validateValue(value, schema)) { throw new Error('Value does not conform to schema'); } - + this.collectNamedSchemas(schema); - + if (Array.isArray(schema) && selectedIndex !== undefined) { this.writeUnion(value, schema, selectedIndex); } else { this.writeValue(value, schema); } - + return this.writer.flush(); } @@ -130,7 +130,7 @@ export class AvroSchemaEncoder { if (typeof schema === 'object' && schema.type !== 'record') { throw new Error('Schema is not a record schema'); } - + const recordSchema = this.resolveSchema(schema) as AvroRecordSchema; if (recordSchema.type !== 'record') { throw new Error('Schema is not a record schema'); @@ -156,7 +156,7 @@ export class AvroSchemaEncoder { if (typeof schema === 'object' && schema.type !== 'enum') { throw new Error('Schema is not an enum schema'); } - + const enumSchema = this.resolveSchema(schema) as AvroEnumSchema; if (enumSchema.type !== 'enum') { throw new Error('Schema is not an enum schema'); @@ -166,7 +166,7 @@ export class AvroSchemaEncoder { if (index === -1) { throw new Error(`Invalid enum value: ${value}`); } - + this.writeVarIntSigned(this.encodeZigZag32(index)); } @@ -177,7 +177,7 @@ export class AvroSchemaEncoder { if (typeof schema === 'object' && schema.type !== 'array') { throw new Error('Schema is not an array schema'); } - + const arraySchema = this.resolveSchema(schema) as AvroArraySchema; if (arraySchema.type !== 'array') { throw new Error('Schema is not an array schema'); @@ -185,13 +185,13 @@ export class AvroSchemaEncoder { // Write array length this.writeVarIntUnsigned(value.length); - + // Write array items const length = value.length; for (let i = 0; i < length; i++) { this.writeValue(value[i], arraySchema.items); } - + // Write end-of-array marker this.writeVarIntUnsigned(0); } @@ -203,17 +203,17 @@ export class AvroSchemaEncoder { if (typeof schema === 'object' && schema.type !== 'map') { throw new Error('Schema is not a map schema'); } - + const mapSchema = this.resolveSchema(schema) as AvroMapSchema; if (mapSchema.type !== 'map') { throw new Error('Schema is not a map schema'); } const entries = Object.entries(value); - + // Write map length this.writeVarIntUnsigned(entries.length); - + // Write map entries const length = entries.length; for (let i = 0; i < length; i++) { @@ -221,7 +221,7 @@ export class AvroSchemaEncoder { this.encoder.writeStr(entry[0]); this.writeValue(entry[1], mapSchema.values); } - + // Write end-of-map marker this.writeVarIntUnsigned(0); } @@ -237,7 +237,7 @@ export class AvroSchemaEncoder { let index = selectedIndex; if (index === undefined) { // Find the first matching schema in the union - index = schema.findIndex(subSchema => this.validator.validateValue(value, subSchema)); + index = schema.findIndex((subSchema) => this.validator.validateValue(value, subSchema)); if (index === -1) { throw new Error('Value does not match any schema in the union'); } @@ -249,7 +249,7 @@ export class AvroSchemaEncoder { // Write union index this.writeVarIntSigned(this.encodeZigZag32(index)); - + // Write the value according to the selected schema this.writeValue(value, schema[index]); } @@ -261,7 +261,7 @@ export class AvroSchemaEncoder { if (typeof schema === 'object' && schema.type !== 'fixed') { throw new Error('Schema is not a fixed schema'); } - + const fixedSchema = this.resolveSchema(schema) as AvroFixedSchema; if (fixedSchema.type !== 'fixed') { throw new Error('Schema is not a fixed schema'); @@ -270,7 +270,7 @@ export class AvroSchemaEncoder { if (value.length !== fixedSchema.size) { throw new Error(`Fixed value length ${value.length} does not match schema size ${fixedSchema.size}`); } - + this.writer.buf(value, value.length); } @@ -279,12 +279,13 @@ export class AvroSchemaEncoder { */ public writeNumber(value: number, schema: AvroSchema): void { const resolvedSchema = this.resolveSchema(schema); - const schemaType = typeof resolvedSchema === 'string' - ? resolvedSchema - : Array.isArray(resolvedSchema) - ? 'union' - : resolvedSchema.type; - + const schemaType = + typeof resolvedSchema === 'string' + ? resolvedSchema + : Array.isArray(resolvedSchema) + ? 'union' + : resolvedSchema.type; + switch (schemaType) { case 'int': this.writeInt(value, schema); @@ -369,12 +370,13 @@ export class AvroSchemaEncoder { private validateSchemaType(schema: AvroSchema, expectedType: string): void { const resolvedSchema = this.resolveSchema(schema); - const actualType = typeof resolvedSchema === 'string' - ? resolvedSchema - : Array.isArray(resolvedSchema) - ? 'union' - : resolvedSchema.type; - + const actualType = + typeof resolvedSchema === 'string' + ? resolvedSchema + : Array.isArray(resolvedSchema) + ? 'union' + : resolvedSchema.type; + if (actualType !== expectedType) { throw new Error(`Expected schema type ${expectedType}, got ${actualType}`); } @@ -399,7 +401,7 @@ export class AvroSchemaEncoder { const recordSchema = schema as AvroRecordSchema; const recordFullName = this.getFullName(recordSchema.name, recordSchema.namespace); this.namedSchemas.set(recordFullName, recordSchema); - recordSchema.fields.forEach(field => this.collectNamedSchemas(field.type)); + recordSchema.fields.forEach((field) => this.collectNamedSchemas(field.type)); break; case 'enum': const enumSchema = schema as AvroEnumSchema; @@ -457,4 +459,4 @@ export class AvroSchemaEncoder { private encodeZigZag32(value: number): number { return (value << 1) ^ (value >> 31); } -} \ No newline at end of file +} diff --git a/src/avro/AvroSchemaValidator.ts b/src/avro/AvroSchemaValidator.ts index 10bc016d..ffb53f2e 100644 --- a/src/avro/AvroSchemaValidator.ts +++ b/src/avro/AvroSchemaValidator.ts @@ -97,16 +97,16 @@ export class AvroSchemaValidator { private validateUnionSchema(schema: AvroUnionSchema): boolean { if (schema.length === 0) return false; const typeSet = new Set(); - + for (const subSchema of schema) { if (!this.validateSchemaInternal(subSchema)) return false; - + // Union types must be unique const typeName = this.getSchemaTypeName(subSchema); if (typeSet.has(typeName)) return false; typeSet.add(typeName); } - + return true; } @@ -144,7 +144,7 @@ export class AvroSchemaValidator { private validateRecordSchema(schema: AvroRecordSchema): boolean { if (schema.type !== 'record' || !schema.name || !Array.isArray(schema.fields)) return false; - + const fullName = this.getFullName(schema.name, schema.namespace); if (this.namedSchemas.has(fullName)) return false; this.namedSchemas.set(fullName, schema); @@ -160,16 +160,12 @@ export class AvroSchemaValidator { } private validateRecordField(field: AvroRecordField): boolean { - return ( - typeof field.name === 'string' && - field.name.length > 0 && - this.validateSchemaInternal(field.type) - ); + return typeof field.name === 'string' && field.name.length > 0 && this.validateSchemaInternal(field.type); } private validateEnumSchema(schema: AvroEnumSchema): boolean { if (schema.type !== 'enum' || !schema.name || !Array.isArray(schema.symbols)) return false; - + const fullName = this.getFullName(schema.name, schema.namespace); if (this.namedSchemas.has(fullName)) return false; this.namedSchemas.set(fullName, schema); @@ -198,7 +194,7 @@ export class AvroSchemaValidator { private validateFixedSchema(schema: AvroFixedSchema): boolean { if (schema.type !== 'fixed' || !schema.name || typeof schema.size !== 'number') return false; if (schema.size < 0) return false; - + const fullName = this.getFullName(schema.name, schema.namespace); if (this.namedSchemas.has(fullName)) return false; this.namedSchemas.set(fullName, schema); @@ -213,7 +209,7 @@ export class AvroSchemaValidator { if (Array.isArray(schema)) { // Union - value must match one of the schemas - return schema.some(subSchema => this.validateValueAgainstSchema(value, subSchema)); + return schema.some((subSchema) => this.validateValueAgainstSchema(value, subSchema)); } if (typeof schema === 'object' && schema !== null) { @@ -294,13 +290,13 @@ export class AvroSchemaValidator { private validateValueAgainstArray(value: unknown, schema: AvroArraySchema): boolean { if (!Array.isArray(value)) return false; - return value.every(item => this.validateValueAgainstSchema(item, schema.items)); + return value.every((item) => this.validateValueAgainstSchema(item, schema.items)); } private validateValueAgainstMap(value: unknown, schema: AvroMapSchema): boolean { if (typeof value !== 'object' || value === null) return false; const obj = value as Record; - return Object.values(obj).every(val => this.validateValueAgainstSchema(val, schema.values)); + return Object.values(obj).every((val) => this.validateValueAgainstSchema(val, schema.values)); } private validateValueAgainstFixed(value: unknown, schema: AvroFixedSchema): boolean { @@ -316,4 +312,4 @@ export class AvroSchemaValidator { private getFullName(name: string, namespace?: string): string { return namespace ? `${namespace}.${name}` : name; } -} \ No newline at end of file +} diff --git a/src/avro/__tests__/AvroEncoder.spec.ts b/src/avro/__tests__/AvroEncoder.spec.ts index fcd77787..8ee29aa5 100644 --- a/src/avro/__tests__/AvroEncoder.spec.ts +++ b/src/avro/__tests__/AvroEncoder.spec.ts @@ -196,14 +196,14 @@ describe('AvroEncoder', () => { expect(result).toEqual(new Uint8Array([84])); // 42 zigzag encoded writer.reset(); - + // Integer outside int range encoder.writeNumber(3000000000); result = writer.flush(); expect(result.length).toBeGreaterThan(1); writer.reset(); - + // Float encoder.writeNumber(3.14); result = writer.flush(); @@ -284,4 +284,4 @@ describe('AvroEncoder', () => { expect(result.length).toBe(8); }); }); -}); \ No newline at end of file +}); diff --git a/src/avro/__tests__/AvroSchemaEncoder.spec.ts b/src/avro/__tests__/AvroSchemaEncoder.spec.ts index 9e47d557..90b95b02 100644 --- a/src/avro/__tests__/AvroSchemaEncoder.spec.ts +++ b/src/avro/__tests__/AvroSchemaEncoder.spec.ts @@ -249,7 +249,7 @@ describe('AvroSchemaEncoder', () => { test('encodes union value with explicit index', () => { const schema: AvroUnionSchema = ['null', 'string']; - + const result = encoder.encode('hello', schema, 1); expect(result[0]).toBe(2); // index 1 zigzag encoded is 2 }); @@ -403,4 +403,4 @@ describe('AvroSchemaEncoder', () => { expect(result.length).toBeGreaterThan(0); }); }); -}); \ No newline at end of file +}); diff --git a/src/avro/__tests__/AvroSchemaValidator.spec.ts b/src/avro/__tests__/AvroSchemaValidator.spec.ts index b63ab5e6..a937f128 100644 --- a/src/avro/__tests__/AvroSchemaValidator.spec.ts +++ b/src/avro/__tests__/AvroSchemaValidator.spec.ts @@ -357,4 +357,4 @@ describe('AvroSchemaValidator', () => { expect(validator.validateValue(new Uint8Array([1, 2, 3, 4, 5]), schema)).toBe(false); }); }); -}); \ No newline at end of file +}); diff --git a/src/avro/index.ts b/src/avro/index.ts index a820c5aa..698c207e 100644 --- a/src/avro/index.ts +++ b/src/avro/index.ts @@ -1,4 +1,4 @@ export * from './types'; export * from './AvroSchemaValidator'; export * from './AvroEncoder'; -export * from './AvroSchemaEncoder'; \ No newline at end of file +export * from './AvroSchemaEncoder';