Error on non-ASCII digits in numbers instead of emulating the buggy W…

…in32 behavior There's basically no chance that anyone is using the Win32 behavior around non-ASCII digits in numbers intentionally since it leads to totally arbitrary results, and would not even be usable within a shared `.h` file.
squeek502 · Aug 6, 2023 · 98a39f8 · 98a39f8
1 parent f9654e1
commit 98a39f8
Show file tree

Hide file tree

Showing 8 changed files with 269 additions and 159 deletions.
diff --git a/README.md b/README.md
@@ -140,6 +140,8 @@ The plan is to use fuzz testing with the `rc` tool as an oracle to ensure that `
   + The Win32 RC compiler will not error and instead the number of controls will overflow and wrap back around to 0. This leads to an incorrect dialog resource.
 - `resinator` will error if a control within a `DIALOG`/`DIALOGEX` resource contains more extra data than the max of a `u16`, since the dialog needs to be able to specify the number of extra data bytes of each control as a `u16`.
   + The Win32 RC compiler will not error and instead the extra data length of the control will overflow and wrap back around to 0. This leads to an incorrect dialog resource.
+- `resinator` will error if non-ASCII digit characters are used in a number literal or resource id/type ordinal
+  + The Win32 RC compiler allows non-ASCII digit characters in base 10 number literals and subtracts the value of the ASCII `'0'` character from their codepoint value to get their 'digit' value, which leads to totally arbitrary results (e.g. the character `²` has a codepoint of `178`, and `178 - '0' = 130`, so a number literal like `1²3` would end up as the number value `1403` when evaluated). This is the case for any UTF-16 code unit for which the Windows implementation of `iswdigit` returns true.
 
 ### Unavoidable divergences from the MSVC++ `rc` tool
 

diff --git a/src/compile.zig b/src/compile.zig
@@ -364,24 +364,8 @@ pub const Compiler = struct {
     }
 
     pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void {
-        const id_bytes = SourceBytes{
-            .slice = node.id.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.id),
-        };
-        const type_bytes = SourceBytes{
-            .slice = node.type.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.type),
-        };
         // Init header with data size zero for now, will need to fill it in later
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            0,
-            self.state.language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(node.id, node.type, .{});
         defer header.deinit(self.allocator);
 
         const maybe_predefined_type = header.predefinedResourceType();
@@ -1161,23 +1145,10 @@ pub const Compiler = struct {
     }
 
     pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void {
-        const id_bytes = SourceBytes{
-            .slice = id_token.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(id_token),
-        };
-        const type_bytes = SourceBytes{
-            .slice = type_token.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(type_token),
-        };
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            data_size,
-            language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(id_token, type_token, .{
+            .language = language,
+            .data_size = data_size,
+        });
         defer header.deinit(self.allocator);
 
         header.applyMemoryFlags(common_resource_attributes, self.source);
@@ -1247,23 +1218,9 @@ pub const Compiler = struct {
         // This intCast can't fail because the limitedWriter above guarantees that
         // we will never write more than maxInt(u32) bytes.
         const data_size: u32 = @intCast(data_buffer.items.len);
-        const id_bytes = SourceBytes{
-            .slice = node.id.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.id),
-        };
-        const type_bytes = SourceBytes{
-            .slice = node.type.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.type),
-        };
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            data_size,
-            self.state.language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(node.id, node.type, .{
+            .data_size = data_size,
+        });
         defer header.deinit(self.allocator);
 
         header.applyMemoryFlags(node.common_resource_attributes, self.source);
@@ -1438,6 +1395,23 @@ pub const Compiler = struct {
                             };
                             optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes);
 
+                            if (optional_statement_values.menu.? == .name) {
+                                if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| {
+                                    try self.addErrorDetails(.{
+                                        .err = .invalid_digit_character_in_ordinal,
+                                        .type = .err,
+                                        .token = literal_node.token,
+                                    });
+                                    return self.addErrorDetailsAndFail(.{
+                                        .err = .win32_non_ascii_ordinal,
+                                        .type = .note,
+                                        .token = literal_node.token,
+                                        .print_source_line = false,
+                                        .extra = .{ .number = win32_rc_ordinal.ordinal },
+                                    });
+                                }
+                            }
+
                             // Need to keep track of some properties of the value
                             // in order to emit the appropriate warning(s) later on.
                             // See where the warning are emitted below (outside this loop)
@@ -1650,23 +1624,9 @@ pub const Compiler = struct {
         }
 
         const data_size: u32 = @intCast(data_buffer.items.len);
-        const id_bytes = SourceBytes{
-            .slice = node.id.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.id),
-        };
-        const type_bytes = SourceBytes{
-            .slice = node.type.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.type),
-        };
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            data_size,
-            self.state.language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(node.id, node.type, .{
+            .data_size = data_size,
+        });
         defer header.deinit(self.allocator);
 
         header.applyMemoryFlags(node.common_resource_attributes, self.source);
@@ -1978,23 +1938,9 @@ pub const Compiler = struct {
         }
 
         const data_size: u32 = @intCast(data_buffer.items.len);
-        const id_bytes = SourceBytes{
-            .slice = node.id.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.id),
-        };
-        const type_bytes = SourceBytes{
-            .slice = node.type.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.type),
-        };
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            data_size,
-            self.state.language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(node.id, node.type, .{
+            .data_size = data_size,
+        });
         defer header.deinit(self.allocator);
 
         header.applyMemoryFlags(node.common_resource_attributes, self.source);
@@ -2066,19 +2012,9 @@ pub const Compiler = struct {
         // This intCast can't fail because the limitedWriter above guarantees that
         // we will never write more than maxInt(u32) bytes.
         const data_size: u32 = @intCast(data_buffer.items.len);
-        const id_bytes = SourceBytes{
-            .slice = node.id.slice(self.source),
-            .code_page = self.input_code_pages.getForToken(node.id),
-        };
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            data_size,
-            self.state.language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(node.id, node.type, .{
+            .data_size = data_size,
+        });
         defer header.deinit(self.allocator);
 
         header.applyMemoryFlags(node.common_resource_attributes, self.source);
@@ -2342,17 +2278,9 @@ pub const Compiler = struct {
         // And now that we know the full size of this node (including its children), set its size
         std.mem.writeIntLittle(u16, data_buffer.items[0..2], data_size);
 
-        const type_bytes = self.sourceBytesForToken(node.versioninfo);
-        const id_bytes = self.sourceBytesForToken(node.id);
-        var header = try ResourceHeader.init(
-            self.allocator,
-            id_bytes,
-            type_bytes,
-            data_size,
-            self.state.language,
-            self.state.version,
-            self.state.characteristics,
-        );
+        var header = try self.resourceHeader(node.id, node.versioninfo, .{
+            .data_size = data_size,
+        });
         defer header.deinit(self.allocator);
 
         header.applyMemoryFlags(node.common_resource_attributes, self.source);
@@ -2527,6 +2455,57 @@ pub const Compiler = struct {
         }
     }
 
+    pub const ResourceHeaderOptions = struct {
+        language: ?res.Language = null,
+        data_size: DWORD = 0,
+    };
+
+    pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader {
+        const id_bytes = self.sourceBytesForToken(id_token);
+        const type_bytes = self.sourceBytesForToken(type_token);
+        return ResourceHeader.init(
+            self.allocator,
+            id_bytes,
+            type_bytes,
+            options.data_size,
+            options.language orelse self.state.language,
+            self.state.version,
+            self.state.characteristics,
+        ) catch |err| switch (err) {
+            error.OutOfMemory => |e| return e,
+            error.TypeNonAsciiOrdinal => {
+                const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?;
+                try self.addErrorDetails(.{
+                    .err = .invalid_digit_character_in_ordinal,
+                    .type = .err,
+                    .token = type_token,
+                });
+                return self.addErrorDetailsAndFail(.{
+                    .err = .win32_non_ascii_ordinal,
+                    .type = .note,
+                    .token = type_token,
+                    .print_source_line = false,
+                    .extra = .{ .number = win32_rc_ordinal.ordinal },
+                });
+            },
+            error.IdNonAsciiOrdinal => {
+                const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?;
+                try self.addErrorDetails(.{
+                    .err = .invalid_digit_character_in_ordinal,
+                    .type = .err,
+                    .token = id_token,
+                });
+                return self.addErrorDetailsAndFail(.{
+                    .err = .win32_non_ascii_ordinal,
+                    .type = .note,
+                    .token = id_token,
+                    .print_source_line = false,
+                    .extra = .{ .number = win32_rc_ordinal.ordinal },
+                });
+            },
+        };
+    }
+
     pub const ResourceHeader = struct {
         name_value: NameOrOrdinal,
         type_value: NameOrOrdinal,
@@ -2537,7 +2516,9 @@ pub const Compiler = struct {
         characteristics: DWORD,
         data_version: DWORD = 0,
 
-        pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) !ResourceHeader {
+        pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal };
+
+        pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader {
             const type_value = type: {
                 const resource_type = Resource.fromString(type_bytes);
                 if (res.RT.fromResource(resource_type)) |rt_constant| {
@@ -2547,9 +2528,19 @@ pub const Compiler = struct {
                 }
             };
             errdefer type_value.deinit(allocator);
+            if (type_value == .name) {
+                if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| {
+                    return error.TypeNonAsciiOrdinal;
+                }
+            }
 
             const name_value = try NameOrOrdinal.fromString(allocator, id_bytes);
             errdefer name_value.deinit(allocator);
+            if (name_value == .name) {
+                if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| {
+                    return error.IdNonAsciiOrdinal;
+                }
+            }
 
             const predefined_resource_type = type_value.predefinedResourceType();
 
@@ -4461,10 +4452,10 @@ test "filename evaluation" {
         null,
         tmp_dir.dir,
     );
-    // Same with number literals (0xB2 is ² in Windows-1252)
+    // Number literals are accepted
     try testCompileErrorDetailsWithDir(
-        &.{.{ .type = .err, .str = "unable to open file '-1²': FileNotFound" }},
-        "#pragma code_page(1252)\n1 RCDATA -1\xB2",
+        &.{.{ .type = .err, .str = "unable to open file '-12': FileNotFound" }},
+        "#pragma code_page(1252)\n1 RCDATA -12",
         null,
         tmp_dir.dir,
     );

diff --git a/src/errors.zig b/src/errors.zig
@@ -279,6 +279,7 @@ pub const ErrorDetails = struct {
         unfinished_string_literal,
         string_literal_too_long,
         invalid_number_with_exponent,
+        invalid_digit_character_in_number_literal,
         illegal_byte,
         illegal_byte_outside_string_literals,
         illegal_codepoint_outside_string_literals,
@@ -381,13 +382,18 @@ pub const ErrorDetails = struct {
         dialog_menu_id_was_uppercased,
         /// `menu_or_class` is populated and contains the type of the parameter statement
         duplicate_menu_or_class_skipped,
+        invalid_digit_character_in_ordinal,
 
         // Literals
         /// `number` is populated
         rc_would_miscompile_codepoint_byte_swap,
         /// `number` is populated
         rc_would_miscompile_codepoint_skip,
         tab_converted_to_spaces,
+
+        // General (used in various places)
+        /// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
+        win32_non_ascii_ordinal,
     };
 
     pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
@@ -401,6 +407,11 @@ pub const ErrorDetails = struct {
             .invalid_number_with_exponent => {
                 return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)});
             },
+            .invalid_digit_character_in_number_literal => switch (self.type) {
+                .err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"),
+                .note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"),
+                .hint => return,
+            },
             .illegal_byte => {
                 return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))});
             },
@@ -720,6 +731,9 @@ pub const ErrorDetails = struct {
                     @tagName(self.extra.menu_or_class),
                 });
             },
+            .invalid_digit_character_in_ordinal => {
+                return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values");
+            },
             .rc_would_miscompile_codepoint_byte_swap => switch (self.type) {
                 .err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}),
                 .note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}),
@@ -735,6 +749,11 @@ pub const ErrorDetails = struct {
                 .note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"),
                 .hint => return,
             },
+            .win32_non_ascii_ordinal => switch (self.type) {
+                .err, .warning => unreachable,
+                .note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
+                .hint => return,
+            },
         }
     }