From fc41b9596d18bb96e1ca1762c0c7f034dc2e9c3f Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 8 Oct 2025 13:22:50 +0100 Subject: [PATCH 1/2] fix(otel): prevent unpaired unicode surrogate pairs from causing insert errors --- apps/webapp/app/v3/otlpExporter.server.ts | 119 +++++++++++++----- .../hello-world/src/trigger/telemetry.ts | 14 +++ 2 files changed, 102 insertions(+), 31 deletions(-) diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index b36ad1a939..b2cd62f181 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -29,6 +29,8 @@ import type { import { startSpan } from "./tracing.server"; import { enrichCreatableEvents } from "./utils/enrichCreatableEvents.server"; import { env } from "~/env.server"; +import { detectBadJsonStrings } from "~/utils/detectBadJsonStrings"; +import { singleton } from "~/utils/singleton"; class OTLPExporter { private _tracer: Tracer; @@ -221,18 +223,16 @@ function convertLogsToCreateableEvents( ); const properties = - convertKeyValueItemsToMap( - truncateAttributes(log.attributes ?? [], spanAttributeValueLengthLimit), - [], - undefined, - [ + truncateAttributes( + convertKeyValueItemsToMap(log.attributes ?? [], [], undefined, [ SemanticInternalAttributes.USAGE, SemanticInternalAttributes.SPAN, SemanticInternalAttributes.METADATA, SemanticInternalAttributes.STYLE, SemanticInternalAttributes.METRIC_EVENTS, SemanticInternalAttributes.TRIGGER, - ] + ]), + spanAttributeValueLengthLimit ) ?? {}; return { @@ -304,18 +304,16 @@ function convertSpansToCreateableEvents( ); const properties = - convertKeyValueItemsToMap( - truncateAttributes(span.attributes ?? [], spanAttributeValueLengthLimit), - [], - undefined, - [ + truncateAttributes( + convertKeyValueItemsToMap(span.attributes ?? [], [], undefined, [ SemanticInternalAttributes.USAGE, SemanticInternalAttributes.SPAN, SemanticInternalAttributes.METADATA, SemanticInternalAttributes.STYLE, SemanticInternalAttributes.METRIC_EVENTS, SemanticInternalAttributes.TRIGGER, - ] + ]), + spanAttributeValueLengthLimit ) ?? {}; return { @@ -774,24 +772,83 @@ function binaryToHex(buffer: Buffer | string | undefined): string | undefined { return Buffer.from(Array.from(buffer)).toString("hex"); } -function truncateAttributes(attributes: KeyValue[], maximumLength: number = 1024): KeyValue[] { - return attributes.map((attribute) => { - return isStringValue(attribute.value) - ? { - key: attribute.key, - value: { - stringValue: attribute.value.stringValue.slice(0, maximumLength), - }, - } - : attribute; - }); +function truncateAttributes( + attributes: Record | undefined, + maximumLength: number = 1024 +): Record | undefined { + if (!attributes) return undefined; + + const truncatedAttributes: Record = {}; + + for (const [key, value] of Object.entries(attributes)) { + if (!key) continue; + + if (typeof value === "string") { + truncatedAttributes[key] = truncateAndDetectUnpairedSurrogate(value, maximumLength); + } else { + truncatedAttributes[key] = value; + } + } + + return truncatedAttributes; } -export const otlpExporter = new OTLPExporter( - eventRepository, - clickhouseEventRepository, - process.env.OTLP_EXPORTER_VERBOSE === "1", - process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT - ? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10) - : 8192 -); +function truncateAndDetectUnpairedSurrogate(str: string, maximumLength: number): string { + const truncatedString = smartTruncateString(str, maximumLength); + + if (hasUnpairedSurrogateAtEnd(truncatedString)) { + return smartTruncateString(truncatedString, [...truncatedString].length - 1); + } + + return truncatedString; +} + +const ASCII_ONLY_REGEX = /^[\x00-\x7F]*$/; + +function smartTruncateString(str: string, maximumLength: number): string { + if (!str) return ""; + if (str.length <= maximumLength) return str; + + if (ASCII_ONLY_REGEX.test(str)) { + return str.slice(0, maximumLength); + } + + return [...str].slice(0, maximumLength).join(""); +} + +function hasUnpairedSurrogateAtEnd(str: string): boolean { + if (str.length === 0) return false; + + const lastCode = str.charCodeAt(str.length - 1); + + // Check if last character is an unpaired high surrogate + if (lastCode >= 0xd800 && lastCode <= 0xdbff) { + return true; // High surrogate at end = unpaired + } + + // Check if last character is an unpaired low surrogate + if (lastCode >= 0xdc00 && lastCode <= 0xdfff) { + // Low surrogate is only valid if preceded by high surrogate + if (str.length === 1) return true; // Single low surrogate + + const secondLastCode = str.charCodeAt(str.length - 2); + if (secondLastCode < 0xd800 || secondLastCode > 0xdbff) { + return true; // Low surrogate not preceded by high surrogate + } + } + + return false; +} + +export const otlpExporter = singleton("otlpExporter", initializeOTLPExporter); + +function initializeOTLPExporter() { + return new OTLPExporter( + eventRepository, + clickhouseEventRepository, + process.env.OTLP_EXPORTER_VERBOSE === "1", + process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT + ? parseInt(process.env.SERVER_OTEL_SPAN_ATTRIBUTE_VALUE_LENGTH_LIMIT, 10) + : 8192 + ); +} diff --git a/references/hello-world/src/trigger/telemetry.ts b/references/hello-world/src/trigger/telemetry.ts index 5dbecb8963..9acf5a6ce6 100644 --- a/references/hello-world/src/trigger/telemetry.ts +++ b/references/hello-world/src/trigger/telemetry.ts @@ -51,6 +51,20 @@ export const taskWithChildTasks = task({ }, }); +export const taskWithBadLogString = task({ + id: "otel/task-with-bad-log-string", + run: async (payload: any, { ctx }) => { + logger.log("Hello, world!", { + myString: "šŸ‘‹šŸ½ I’m Shelby, of Defense.\n\nš‹šžš­'š¬ š›š®š¢š„š š­š”šž \ud835", + }); + + logger.log("Hello, world!", { + myString: + "šŸ‘‹šŸ½ I’m Shelby, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, MIT-trained mathematician, and AI researcher, engineer, and speaker.\n\nI drive clarity, vision, and execution at the frontier of AI, empowering teams to build breakthrough technologies with real-world, enterprise impact. šŸ’„\n\nšŸ”¹ 35+ influential AI research publications across AI agents, LLMs, SLMs, and ML (see š˜—š˜¶š˜£š˜­š˜Ŗš˜¤š˜¢š˜µš˜Ŗš˜°š˜Æš˜“ below)\nšŸ”¹ 8+ years developing applied AI for Fortune 500 use cases\nšŸ”¹ 10+ years hands-on engineering • 16+ years teaching & speaking with clarity\nšŸ”¹ Featured in VentureBeat, ZDNET, and more (see š˜”š˜¦š˜„š˜Ŗš˜¢ š˜Šš˜°š˜·š˜¦š˜³š˜¢š˜Øš˜¦ below)\nšŸ”¹ 30+ AI keynotes, talks, podcasts, and panels (see š˜’š˜¦š˜ŗš˜Æš˜°š˜µš˜¦š˜“ below)\n\nCurrently, I lead and manage a growing team of AI researchers and engineers at Salesforce. We push the boundaries of agentic AI, multi-agent systems, on-device AI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research and engineering at Intel, IBM Research, MITRE, and the Department of Defense.\n\nš‹šžš­'š¬ š›š®š¢š„š š­š”šž \ud835", + }); + }, +}); + export const generateLogsParentTask = task({ id: "otel/generate-logs-parent", run: async (payload: any) => { From b02bfc236d4f3182ebe7cf0bfc988bb8e0b7345a Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 8 Oct 2025 14:30:09 +0100 Subject: [PATCH 2/2] only check parts of the string that are not going to get truncated remove unnecessary taks --- apps/webapp/app/v3/otlpExporter.server.ts | 8 +++++--- references/hello-world/src/trigger/telemetry.ts | 14 -------------- 2 files changed, 5 insertions(+), 17 deletions(-) diff --git a/apps/webapp/app/v3/otlpExporter.server.ts b/apps/webapp/app/v3/otlpExporter.server.ts index b2cd62f181..cb05b375fc 100644 --- a/apps/webapp/app/v3/otlpExporter.server.ts +++ b/apps/webapp/app/v3/otlpExporter.server.ts @@ -803,17 +803,19 @@ function truncateAndDetectUnpairedSurrogate(str: string, maximumLength: number): return truncatedString; } -const ASCII_ONLY_REGEX = /^[\x00-\x7F]*$/; +const ASCII_ONLY_REGEX = /^[\p{ASCII}]*$/u; function smartTruncateString(str: string, maximumLength: number): string { if (!str) return ""; if (str.length <= maximumLength) return str; - if (ASCII_ONLY_REGEX.test(str)) { + const checkLength = Math.min(str.length, maximumLength * 2 + 2); + + if (ASCII_ONLY_REGEX.test(str.slice(0, checkLength))) { return str.slice(0, maximumLength); } - return [...str].slice(0, maximumLength).join(""); + return [...str.slice(0, checkLength)].slice(0, maximumLength).join(""); } function hasUnpairedSurrogateAtEnd(str: string): boolean { diff --git a/references/hello-world/src/trigger/telemetry.ts b/references/hello-world/src/trigger/telemetry.ts index 9acf5a6ce6..5dbecb8963 100644 --- a/references/hello-world/src/trigger/telemetry.ts +++ b/references/hello-world/src/trigger/telemetry.ts @@ -51,20 +51,6 @@ export const taskWithChildTasks = task({ }, }); -export const taskWithBadLogString = task({ - id: "otel/task-with-bad-log-string", - run: async (payload: any, { ctx }) => { - logger.log("Hello, world!", { - myString: "šŸ‘‹šŸ½ I’m Shelby, of Defense.\n\nš‹šžš­'š¬ š›š®š¢š„š š­š”šž \ud835", - }); - - logger.log("Hello, world!", { - myString: - "šŸ‘‹šŸ½ I’m Shelby, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, an award-winning people leader, MIT-trained mathematician, and AI researcher, engineer, and speaker.\n\nI drive clarity, vision, and execution at the frontier of AI, empowering teams to build breakthrough technologies with real-world, enterprise impact. šŸ’„\n\nšŸ”¹ 35+ influential AI research publications across AI agents, LLMs, SLMs, and ML (see š˜—š˜¶š˜£š˜­š˜Ŗš˜¤š˜¢š˜µš˜Ŗš˜°š˜Æš˜“ below)\nšŸ”¹ 8+ years developing applied AI for Fortune 500 use cases\nšŸ”¹ 10+ years hands-on engineering • 16+ years teaching & speaking with clarity\nšŸ”¹ Featured in VentureBeat, ZDNET, and more (see š˜”š˜¦š˜„š˜Ŗš˜¢ š˜Šš˜°š˜·š˜¦š˜³š˜¢š˜Øš˜¦ below)\nšŸ”¹ 30+ AI keynotes, talks, podcasts, and panels (see š˜’š˜¦š˜ŗš˜Æš˜°š˜µš˜¦š˜“ below)\n\nCurrently, I lead and manage a growing team of AI researchers and engineers at Salesforce. We push the boundaries of agentic AI, multi-agent systems, on-device AI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research andAI, and efficient models.\n\nPreviously, I spent time in research and engineering at Intel, IBM Research, MITRE, and the Department of Defense.\n\nš‹šžš­'š¬ š›š®š¢š„š š­š”šž \ud835", - }); - }, -}); - export const generateLogsParentTask = task({ id: "otel/generate-logs-parent", run: async (payload: any) => {