Skip to content

Commit f5fe28e

Browse files
committed
Don't set array capacity in repeat-expr (costly in invalid files)
Cc @armijnhemel Although the pre-allocation of the array can speed up parsing of the repeated field by a few percent (assuming that the parsed file is completely valid), it poses a serious issue for invalid files where some random group of high bytes has been interpreted as the number of entries. That will allocate a ridiculously large array (for example 2 million entries or more), which is costly and unnecessary (because the parsing is likely to fail much sooner than after parsing all 2 million records). This change puts the `repeat: expr` in line with `eos` and `until`, so it was suddenly possible to refactor the piece of code taking care of creating the array to a common place - the `condRepeatExprHeader()` method, which is called from all types of repeats.
1 parent 5773e73 commit f5fe28e

14 files changed

+145
-239
lines changed

shared/src/main/scala/io/kaitai/struct/languages/CSharpCompiler.scala

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -274,15 +274,17 @@ class CSharpCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
274274

275275
override def condIfFooter(expr: expr): Unit = fileFooter(null)
276276

277-
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
277+
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
278278
importList.add("System.Collections.Generic")
279279

280280
if (needRaw.level >= 1)
281281
out.puts(s"${privateMemberName(RawIdentifier(id))} = new List<byte[]>();")
282282
if (needRaw.level >= 2)
283283
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new List<byte[]>();")
284-
285284
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2NativeType(ArrayTypeInStream(dataType))}();")
285+
}
286+
287+
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
286288
out.puts("{")
287289
out.inc
288290
out.puts("var i = 0;")
@@ -302,33 +304,18 @@ class CSharpCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
302304
out.puts("}")
303305
}
304306

305-
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: expr): Unit = {
306-
importList.add("System.Collections.Generic")
307-
308-
if (needRaw.level >= 1)
309-
out.puts(s"${privateMemberName(RawIdentifier(id))} = new List<byte[]>((int) (${expression(repeatExpr)}));")
310-
if (needRaw.level >= 2)
311-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new List<byte[]>((int) (${expression(repeatExpr)}));")
312-
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2NativeType(ArrayTypeInStream(dataType))}((int) (${expression(repeatExpr)}));")
307+
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: expr): Unit = {
313308
out.puts(s"for (var i = 0; i < ${expression(repeatExpr)}; i++)")
314309
out.puts("{")
315310
out.inc
316311
}
317312

318-
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
319-
out.puts(s"${privateMemberName(id)}.Add($expr);")
320-
}
313+
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
314+
handleAssignmentRepeatEos(id, expr)
321315

322316
override def condRepeatExprFooter: Unit = fileFooter(null)
323317

324-
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
325-
importList.add("System.Collections.Generic")
326-
327-
if (needRaw.level >= 1)
328-
out.puts(s"${privateMemberName(RawIdentifier(id))} = new List<byte[]>();")
329-
if (needRaw.level >= 2)
330-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new List<byte[]>();")
331-
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2NativeType(ArrayTypeInStream(dataType))}();")
318+
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
332319
out.puts("{")
333320
out.inc
334321
out.puts("var i = 0;")
@@ -347,7 +334,7 @@ class CSharpCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
347334
out.puts(s"${privateMemberName(id)}.Add($tempVar);")
348335
}
349336

350-
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
337+
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
351338
typeProvider._currentIteratorType = Some(dataType)
352339
out.puts("i++;")
353340
out.dec

shared/src/main/scala/io/kaitai/struct/languages/CppCompiler.scala

Lines changed: 10 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ class CppCompiler(
619619
outSrc.puts("}")
620620
}
621621

622-
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
622+
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
623623
importListHdr.addSystem("vector")
624624

625625
if (needRaw.level >= 1) {
@@ -632,6 +632,9 @@ class CppCompiler(
632632
outSrc.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = ${newVector(CalcBytesType)};")
633633
}
634634
outSrc.puts(s"${privateMemberName(id)} = ${newVector(dataType)};")
635+
}
636+
637+
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
635638
outSrc.puts("{")
636639
outSrc.inc
637640
outSrc.puts("int i = 0;")
@@ -651,54 +654,22 @@ class CppCompiler(
651654
outSrc.puts("}")
652655
}
653656

654-
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: Ast.expr): Unit = {
655-
importListHdr.addSystem("vector")
656-
657+
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: Ast.expr): Unit = {
657658
val lenVar = s"l_${idToStr(id)}"
658-
outSrc.puts(s"int $lenVar = ${expression(repeatExpr)};")
659-
if (needRaw.level >= 1) {
660-
val rawId = privateMemberName(RawIdentifier(id))
661-
outSrc.puts(s"$rawId = ${newVector(CalcBytesType)};")
662-
outSrc.puts(s"$rawId->reserve($lenVar);")
663-
if (needRaw.hasIO) {
664-
val ioId = privateMemberName(IoStorageIdentifier(RawIdentifier(id)))
665-
outSrc.puts(s"$ioId = ${newVector(OwnedKaitaiStreamType)};")
666-
outSrc.puts(s"$ioId->reserve($lenVar);")
667-
}
668-
}
669-
if (needRaw.level >= 2) {
670-
val rawId = privateMemberName(RawIdentifier(RawIdentifier(id)))
671-
outSrc.puts(s"$rawId = ${newVector(CalcBytesType)};")
672-
outSrc.puts(s"$rawId->reserve($lenVar);")
673-
}
674-
outSrc.puts(s"${privateMemberName(id)} = ${newVector(dataType)};")
675-
outSrc.puts(s"${privateMemberName(id)}->reserve($lenVar);")
659+
outSrc.puts(s"const int $lenVar = ${expression(repeatExpr)};")
676660
outSrc.puts(s"for (int i = 0; i < $lenVar; i++) {")
677661
outSrc.inc
678662
}
679663

680-
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
681-
outSrc.puts(s"${privateMemberName(id)}->push_back(${stdMoveWrap(expr)});")
682-
}
664+
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
665+
handleAssignmentRepeatEos(id, expr)
683666

684667
override def condRepeatExprFooter: Unit = {
685668
outSrc.dec
686669
outSrc.puts("}")
687670
}
688671

689-
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
690-
importListHdr.addSystem("vector")
691-
692-
if (needRaw.level >= 1) {
693-
outSrc.puts(s"${privateMemberName(RawIdentifier(id))} = ${newVector(CalcBytesType)};")
694-
if (needRaw.hasIO) {
695-
outSrc.puts(s"${privateMemberName(IoStorageIdentifier(RawIdentifier(id)))} = ${newVector(OwnedKaitaiStreamType)};")
696-
}
697-
}
698-
if (needRaw.level >= 2) {
699-
outSrc.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = ${newVector(CalcBytesType)};")
700-
}
701-
outSrc.puts(s"${privateMemberName(id)} = ${newVector(dataType)};")
672+
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
702673
outSrc.puts("{")
703674
outSrc.inc
704675
outSrc.puts("int i = 0;")
@@ -732,7 +703,7 @@ class CppCompiler(
732703
outSrc.puts(s"${privateMemberName(id)}->push_back($wrappedTempVar);")
733704
}
734705

735-
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
706+
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
736707
typeProvider._currentIteratorType = Some(dataType)
737708
outSrc.puts("i++;")
738709
outSrc.dec

shared/src/main/scala/io/kaitai/struct/languages/GoCompiler.scala

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -293,12 +293,12 @@ class GoCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
293293
out.inc
294294
}
295295

296-
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
297-
if (needRaw.level >= 1)
298-
out.puts(s"${privateMemberName(RawIdentifier(id))} = make([][]byte, 0);")
299-
if (needRaw.level >= 2)
300-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = make([][]byte, 0);")
301-
//out.puts(s"${privateMemberName(id)} = make(${kaitaiType2NativeType(ArrayType(dataType))})")
296+
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
297+
// slices don't have to be manually initialized in Go: the built-in append()
298+
// function works even on `nil` slices (https://go.dev/tour/moretypes/15)
299+
}
300+
301+
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
302302
out.puts(s"for i := 1;; i++ {")
303303
out.inc
304304

@@ -318,27 +318,21 @@ class GoCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
318318
out.puts(s"$name = append($name, $expr)")
319319
}
320320

321-
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: Ast.expr): Unit = {
322-
if (needRaw.level >= 1)
323-
out.puts(s"${privateMemberName(RawIdentifier(id))} = make([][]byte, ${expression(repeatExpr)})")
324-
if (needRaw.level >= 2)
325-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = make([][]byte, ${expression(repeatExpr)})")
326-
out.puts(s"${privateMemberName(id)} = make(${kaitaiType2NativeType(ArrayTypeInStream(dataType))}, ${expression(repeatExpr)})")
327-
out.puts(s"for i := range ${privateMemberName(id)} {")
321+
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: Ast.expr): Unit = {
322+
out.puts(s"for i := 0; i < int(${expression(repeatExpr)}); i++ {")
328323
out.inc
324+
// FIXME: Go throws a fatal compile error when the `i` variable is not used (unused variables
325+
// can only use the blank identifier `_`, see https://go.dev/doc/effective_go#blank), so we have
326+
// to silence it like this. It would be nice to be able to analyze all expressions that appear
327+
// in the loop body to decide whether to generate `for _ := range` or `for i := range` here, but
328+
// that would be really difficult to do properly in KSC with the current architecture.
329+
out.puts("_ = i")
329330
}
330331

331-
override def handleAssignmentRepeatExpr(id: Identifier, r: TranslatorResult): Unit = {
332-
val name = privateMemberName(id)
333-
val expr = translator.resToStr(r)
334-
out.puts(s"$name[i] = $expr")
335-
}
332+
override def handleAssignmentRepeatExpr(id: Identifier, r: TranslatorResult): Unit =
333+
handleAssignmentRepeatEos(id, r)
336334

337-
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: Ast.expr): Unit = {
338-
if (needRaw.level >= 1)
339-
out.puts(s"${privateMemberName(RawIdentifier(id))} = make([][]byte, 0);")
340-
if (needRaw.level >= 2)
341-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = make([][]byte, 0);")
335+
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: Ast.expr): Unit = {
342336
out.puts(s"for i := 1;; i++ {")
343337
out.inc
344338
}
@@ -350,7 +344,7 @@ class GoCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
350344
out.puts(s"${privateMemberName(id)} = append(${privateMemberName(id)}, $tempVar)")
351345
}
352346

353-
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: Ast.expr): Unit = {
347+
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: Ast.expr): Unit = {
354348
typeProvider._currentIteratorType = Some(dataType)
355349
out.puts(s"if ${expression(untilExpr)} {")
356350
out.inc

shared/src/main/scala/io/kaitai/struct/languages/JavaCompiler.scala

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -356,12 +356,15 @@ class JavaCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
356356
out.inc
357357
}
358358

359-
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
359+
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
360360
if (needRaw.level >= 1)
361361
out.puts(s"${privateMemberName(RawIdentifier(id))} = new ArrayList<byte[]>();")
362362
if (needRaw.level >= 2)
363363
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new ArrayList<byte[]>();")
364364
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2JavaType(ArrayTypeInStream(dataType))}();")
365+
}
366+
367+
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
365368
out.puts("{")
366369
out.inc
367370
out.puts("int i = 0;")
@@ -383,28 +386,17 @@ class JavaCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
383386
out.puts("}")
384387
}
385388

386-
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: expr): Unit = {
387-
if (needRaw.level >= 1)
388-
out.puts(s"${privateMemberName(RawIdentifier(id))} = new ArrayList<byte[]>(((Number) (${expression(repeatExpr)})).intValue());")
389-
if (needRaw.level >= 2)
390-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new ArrayList<byte[]>(((Number) (${expression(repeatExpr)})).intValue());")
391-
out.puts(s"${idToStr(id)} = new ${kaitaiType2JavaType(ArrayTypeInStream(dataType))}(((Number) (${expression(repeatExpr)})).intValue());")
389+
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: expr): Unit = {
392390
out.puts(s"for (int i = 0; i < ${expression(repeatExpr)}; i++) {")
393391
out.inc
394392

395393
importList.add("java.util.ArrayList")
396394
}
397395

398-
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
399-
out.puts(s"${privateMemberName(id)}.add($expr);")
400-
}
396+
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
397+
handleAssignmentRepeatEos(id, expr)
401398

402-
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
403-
if (needRaw.level >= 1)
404-
out.puts(s"${privateMemberName(RawIdentifier(id))} = new ArrayList<byte[]>();")
405-
if (needRaw.level >= 2)
406-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new ArrayList<byte[]>();")
407-
out.puts(s"${privateMemberName(id)} = new ${kaitaiType2JavaType(ArrayTypeInStream(dataType))}();")
399+
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
408400
out.puts("{")
409401
out.inc
410402
out.puts(s"${kaitaiType2JavaType(dataType)} ${translator.doName("_")};")
@@ -425,7 +417,7 @@ class JavaCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
425417
out.puts(s"${privateMemberName(id)}.add($tempVar);")
426418
}
427419

428-
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
420+
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
429421
typeProvider._currentIteratorType = Some(dataType)
430422
out.puts("i++;")
431423
out.dec

shared/src/main/scala/io/kaitai/struct/languages/JavaScriptCompiler.scala

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -291,19 +291,23 @@ class JavaScriptCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
291291
out.inc
292292
}
293293

294+
// TODO: replace this with UniversalFooter
294295
override def condIfFooter(expr: expr): Unit = {
295296
out.dec
296297
out.puts("}")
297298
}
298299

299-
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw): Unit = {
300+
override def condRepeatCommonInit(id: Identifier, dataType: DataType, needRaw: NeedRaw): Unit = {
300301
if (needRaw.level >= 1)
301302
out.puts(s"${privateMemberName(RawIdentifier(id))} = [];")
302303
if (needRaw.level >= 2)
303304
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = [];")
304305
out.puts(s"${privateMemberName(id)} = [];")
305306
if (config.readStoresPos)
306307
out.puts(s"this._debug.${idToStr(id)}.arr = [];")
308+
}
309+
310+
override def condRepeatEosHeader(id: Identifier, io: String, dataType: DataType): Unit = {
307311
out.puts("var i = 0;")
308312
out.puts(s"while (!$io.isEof()) {")
309313
out.inc
@@ -319,35 +323,20 @@ class JavaScriptCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
319323
out.puts("}")
320324
}
321325

322-
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, repeatExpr: expr): Unit = {
323-
if (needRaw.level >= 1)
324-
out.puts(s"${privateMemberName(RawIdentifier(id))} = new Array(${expression(repeatExpr)});")
325-
if (needRaw.level >= 2)
326-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = new Array(${expression(repeatExpr)});")
327-
out.puts(s"${privateMemberName(id)} = new Array(${expression(repeatExpr)});")
328-
if (config.readStoresPos)
329-
out.puts(s"this._debug.${idToStr(id)}.arr = new Array(${expression(repeatExpr)});")
326+
override def condRepeatExprHeader(id: Identifier, io: String, dataType: DataType, repeatExpr: expr): Unit = {
330327
out.puts(s"for (var i = 0; i < ${expression(repeatExpr)}; i++) {")
331328
out.inc
332329
}
333330

334-
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit = {
335-
out.puts(s"${privateMemberName(id)}[i] = $expr;")
336-
}
331+
override def handleAssignmentRepeatExpr(id: Identifier, expr: String): Unit =
332+
handleAssignmentRepeatEos(id, expr)
337333

338334
override def condRepeatExprFooter: Unit = {
339335
out.dec
340336
out.puts("}")
341337
}
342338

343-
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
344-
if (needRaw.level >= 1)
345-
out.puts(s"${privateMemberName(RawIdentifier(id))} = []")
346-
if (needRaw.level >= 2)
347-
out.puts(s"${privateMemberName(RawIdentifier(RawIdentifier(id)))} = [];")
348-
out.puts(s"${privateMemberName(id)} = []")
349-
if (config.readStoresPos)
350-
out.puts(s"this._debug.${idToStr(id)}.arr = [];")
339+
override def condRepeatUntilHeader(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
351340
out.puts("var i = 0;")
352341
out.puts("do {")
353342
out.inc
@@ -359,7 +348,7 @@ class JavaScriptCompiler(typeProvider: ClassTypeProvider, config: RuntimeConfig)
359348
out.puts(s"${privateMemberName(id)}.push($tmpName);")
360349
}
361350

362-
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, needRaw: NeedRaw, untilExpr: expr): Unit = {
351+
override def condRepeatUntilFooter(id: Identifier, io: String, dataType: DataType, untilExpr: expr): Unit = {
363352
typeProvider._currentIteratorType = Some(dataType)
364353
out.puts("i++;")
365354
out.dec

0 commit comments

Comments
 (0)