Skip to content

Commit 6bc474e

Browse files
zxhyc131zxhyc131dinwwwh
authored
fix(node-adapter): handle utf-8 characters split across stream chunks (#1496)
## Summary - use `StringDecoder` to decode stream content safely across chunk boundaries - add tests for JSON and text bodies with UTF-8 characters split between chunks ## Problem Direct `chunk.toString()` decoding can corrupt UTF-8 characters when multi-byte sequences are split across stream chunks. ## Testing - added unit tests for chunk-boundary UTF-8 decoding in JSON body - added unit tests for chunk-boundary UTF-8 decoding in text body <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **Bug Fixes** * Improved UTF‑8 decoding for streamed request bodies so multi‑byte characters split across chunks are decoded correctly and incomplete final bytes are handled gracefully (replacement character used). * **Tests** * Added tests validating JSON and plain‑text payloads with multi‑byte UTF‑8 characters split across stream boundaries, including an incomplete final byte scenario. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: zxhyc131 <zxhyc131@mercallure.com> Co-authored-by: Dinh Le <dinwwwh@gmail.com>
1 parent f30210a commit 6bc474e

File tree

2 files changed

+66
-4
lines changed

2 files changed

+66
-4
lines changed

packages/standard-server-node/src/body.test.ts

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,29 @@ beforeEach(() => {
1616
vi.clearAllMocks()
1717
})
1818

19+
function createChunkedRequest(contentType: string, chunks: Buffer[]): IncomingMessage {
20+
const request = Readable.from(chunks) as IncomingMessage
21+
request.headers = {
22+
'content-type': contentType,
23+
}
24+
return request
25+
}
26+
27+
function splitBufferInsideCharacter(text: string, splitCharacter: string): Buffer[] {
28+
const buffer = Buffer.from(text)
29+
const splitBytes = Buffer.from(splitCharacter)
30+
const splitIndex = buffer.indexOf(splitBytes)
31+
32+
if (splitIndex === -1) {
33+
throw new Error(`split character not found: ${splitCharacter}`)
34+
}
35+
36+
return [
37+
buffer.subarray(0, splitIndex + 1),
38+
buffer.subarray(splitIndex + 1),
39+
]
40+
}
41+
1942
describe('toStandardBody', () => {
2043
it('undefined', async () => {
2144
let standardBody: StandardBody = {} as any
@@ -51,6 +74,42 @@ describe('toStandardBody', () => {
5174
expect(standardBody).toEqual({ foo: 'bar' })
5275
})
5376

77+
it('json with utf-8 characters split across chunk boundaries', async () => {
78+
const original = {
79+
json: {
80+
text: '滚滚长江东逝水',
81+
},
82+
}
83+
84+
const chunks = splitBufferInsideCharacter(JSON.stringify(original), '江')
85+
const request = createChunkedRequest('application/json', chunks)
86+
87+
const standardBody = await toStandardBody(request)
88+
89+
expect(standardBody).toEqual(original)
90+
})
91+
92+
it('text with utf-8 characters split across chunk boundaries', async () => {
93+
const original = '海内存知己,天涯若比邻'
94+
const chunks = splitBufferInsideCharacter(original, '存')
95+
const request = createChunkedRequest('text/plain', chunks)
96+
97+
const standardBody = await toStandardBody(request)
98+
99+
expect(standardBody).toBe(original)
100+
})
101+
102+
it('text with utf-8 characters split across chunk boundaries end with incomplete utf8', async () => {
103+
const original = '海内存知己,天涯若比邻'
104+
const chunks = splitBufferInsideCharacter(original, '存')
105+
const incompleteUtf8 = Buffer.from([230, 181])
106+
const request = createChunkedRequest('text/plain', [...chunks, incompleteUtf8])
107+
108+
const standardBody = await toStandardBody(request)
109+
110+
expect(standardBody).toBe(`${original}�`)
111+
})
112+
54113
it('json but empty body', async () => {
55114
let standardBody: StandardBody = {} as any
56115

packages/standard-server-node/src/body.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { StandardBody, StandardHeaders } from '@orpc/standard-server'
2-
import type { Buffer } from 'node:buffer'
32
import type { ToEventIteratorOptions, ToEventStreamOptions } from './event-iterator'
43
import type { NodeHttpRequest } from './types'
4+
import { Buffer } from 'node:buffer'
55
import { Readable } from 'node:stream'
66
import { isAsyncIteratorObject, parseEmptyableJSON, runWithSpan, stringifyJSON } from '@orpc/shared'
77
import { flattenHeader, generateContentDisposition, getFilenameFromContentDisposition } from '@orpc/standard-server'
@@ -114,13 +114,16 @@ function _streamToFormData(stream: Readable, contentType: string): Promise<FormD
114114
}
115115

116116
async function _streamToString(stream: Readable): Promise<string> {
117-
let string = ''
117+
const decoder = new TextDecoder()
118+
let text = ''
118119

119120
for await (const chunk of stream) {
120-
string += chunk.toString()
121+
text += decoder.decode(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk), { stream: true })
121122
}
122123

123-
return string
124+
text += decoder.decode()
125+
126+
return text
124127
}
125128

126129
async function _streamToFile(stream: Readable, fileName: string, contentType: string): Promise<File> {

0 commit comments

Comments
 (0)