Skip to content

Commit

Permalink
Error on non-ASCII digits in numbers instead of emulating the buggy W…
Browse files Browse the repository at this point in the history
…in32 behavior

There's basically no chance that anyone is using the Win32 behavior around non-ASCII digits in numbers intentionally since it leads to totally arbitrary results, and would not even be usable within a shared `.h` file.
  • Loading branch information
squeek502 committed Aug 6, 2023
1 parent f9654e1 commit 98a39f8
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 159 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,8 @@ The plan is to use fuzz testing with the `rc` tool as an oracle to ensure that `
+ The Win32 RC compiler will not error and instead the number of controls will overflow and wrap back around to 0. This leads to an incorrect dialog resource.
- `resinator` will error if a control within a `DIALOG`/`DIALOGEX` resource contains more extra data than the max of a `u16`, since the dialog needs to be able to specify the number of extra data bytes of each control as a `u16`.
+ The Win32 RC compiler will not error and instead the extra data length of the control will overflow and wrap back around to 0. This leads to an incorrect dialog resource.
- `resinator` will error if non-ASCII digit characters are used in a number literal or resource id/type ordinal
+ The Win32 RC compiler allows non-ASCII digit characters in base 10 number literals and subtracts the value of the ASCII `'0'` character from their codepoint value to get their 'digit' value, which leads to totally arbitrary results (e.g. the character `²` has a codepoint of `178`, and `178 - '0' = 130`, so a number literal like `1²3` would end up as the number value `1403` when evaluated). This is the case for any UTF-16 code unit for which the Windows implementation of `iswdigit` returns true.

### Unavoidable divergences from the MSVC++ `rc` tool

Expand Down
217 changes: 104 additions & 113 deletions src/compile.zig
Original file line number Diff line number Diff line change
Expand Up @@ -364,24 +364,8 @@ pub const Compiler = struct {
}

pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void {
const id_bytes = SourceBytes{
.slice = node.id.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.id),
};
const type_bytes = SourceBytes{
.slice = node.type.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.type),
};
// Init header with data size zero for now, will need to fill it in later
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
0,
self.state.language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(node.id, node.type, .{});
defer header.deinit(self.allocator);

const maybe_predefined_type = header.predefinedResourceType();
Expand Down Expand Up @@ -1161,23 +1145,10 @@ pub const Compiler = struct {
}

pub fn writeResourceHeader(self: *Compiler, writer: anytype, id_token: Token, type_token: Token, data_size: u32, common_resource_attributes: []Token, language: res.Language) !void {
const id_bytes = SourceBytes{
.slice = id_token.slice(self.source),
.code_page = self.input_code_pages.getForToken(id_token),
};
const type_bytes = SourceBytes{
.slice = type_token.slice(self.source),
.code_page = self.input_code_pages.getForToken(type_token),
};
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
data_size,
language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(id_token, type_token, .{
.language = language,
.data_size = data_size,
});
defer header.deinit(self.allocator);

header.applyMemoryFlags(common_resource_attributes, self.source);
Expand Down Expand Up @@ -1247,23 +1218,9 @@ pub const Compiler = struct {
// This intCast can't fail because the limitedWriter above guarantees that
// we will never write more than maxInt(u32) bytes.
const data_size: u32 = @intCast(data_buffer.items.len);
const id_bytes = SourceBytes{
.slice = node.id.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.id),
};
const type_bytes = SourceBytes{
.slice = node.type.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.type),
};
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
data_size,
self.state.language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(node.id, node.type, .{
.data_size = data_size,
});
defer header.deinit(self.allocator);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
Expand Down Expand Up @@ -1438,6 +1395,23 @@ pub const Compiler = struct {
};
optional_statement_values.menu = try NameOrOrdinal.fromString(self.allocator, bytes);

if (optional_statement_values.menu.? == .name) {
if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(bytes)) |win32_rc_ordinal| {
try self.addErrorDetails(.{
.err = .invalid_digit_character_in_ordinal,
.type = .err,
.token = literal_node.token,
});
return self.addErrorDetailsAndFail(.{
.err = .win32_non_ascii_ordinal,
.type = .note,
.token = literal_node.token,
.print_source_line = false,
.extra = .{ .number = win32_rc_ordinal.ordinal },
});
}
}

// Need to keep track of some properties of the value
// in order to emit the appropriate warning(s) later on.
// See where the warning are emitted below (outside this loop)
Expand Down Expand Up @@ -1650,23 +1624,9 @@ pub const Compiler = struct {
}

const data_size: u32 = @intCast(data_buffer.items.len);
const id_bytes = SourceBytes{
.slice = node.id.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.id),
};
const type_bytes = SourceBytes{
.slice = node.type.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.type),
};
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
data_size,
self.state.language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(node.id, node.type, .{
.data_size = data_size,
});
defer header.deinit(self.allocator);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
Expand Down Expand Up @@ -1978,23 +1938,9 @@ pub const Compiler = struct {
}

const data_size: u32 = @intCast(data_buffer.items.len);
const id_bytes = SourceBytes{
.slice = node.id.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.id),
};
const type_bytes = SourceBytes{
.slice = node.type.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.type),
};
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
data_size,
self.state.language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(node.id, node.type, .{
.data_size = data_size,
});
defer header.deinit(self.allocator);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
Expand Down Expand Up @@ -2066,19 +2012,9 @@ pub const Compiler = struct {
// This intCast can't fail because the limitedWriter above guarantees that
// we will never write more than maxInt(u32) bytes.
const data_size: u32 = @intCast(data_buffer.items.len);
const id_bytes = SourceBytes{
.slice = node.id.slice(self.source),
.code_page = self.input_code_pages.getForToken(node.id),
};
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
data_size,
self.state.language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(node.id, node.type, .{
.data_size = data_size,
});
defer header.deinit(self.allocator);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
Expand Down Expand Up @@ -2342,17 +2278,9 @@ pub const Compiler = struct {
// And now that we know the full size of this node (including its children), set its size
std.mem.writeIntLittle(u16, data_buffer.items[0..2], data_size);

const type_bytes = self.sourceBytesForToken(node.versioninfo);
const id_bytes = self.sourceBytesForToken(node.id);
var header = try ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
data_size,
self.state.language,
self.state.version,
self.state.characteristics,
);
var header = try self.resourceHeader(node.id, node.versioninfo, .{
.data_size = data_size,
});
defer header.deinit(self.allocator);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
Expand Down Expand Up @@ -2527,6 +2455,57 @@ pub const Compiler = struct {
}
}

pub const ResourceHeaderOptions = struct {
language: ?res.Language = null,
data_size: DWORD = 0,
};

pub fn resourceHeader(self: *Compiler, id_token: Token, type_token: Token, options: ResourceHeaderOptions) !ResourceHeader {
const id_bytes = self.sourceBytesForToken(id_token);
const type_bytes = self.sourceBytesForToken(type_token);
return ResourceHeader.init(
self.allocator,
id_bytes,
type_bytes,
options.data_size,
options.language orelse self.state.language,
self.state.version,
self.state.characteristics,
) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
error.TypeNonAsciiOrdinal => {
const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes).?;
try self.addErrorDetails(.{
.err = .invalid_digit_character_in_ordinal,
.type = .err,
.token = type_token,
});
return self.addErrorDetailsAndFail(.{
.err = .win32_non_ascii_ordinal,
.type = .note,
.token = type_token,
.print_source_line = false,
.extra = .{ .number = win32_rc_ordinal.ordinal },
});
},
error.IdNonAsciiOrdinal => {
const win32_rc_ordinal = NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes).?;
try self.addErrorDetails(.{
.err = .invalid_digit_character_in_ordinal,
.type = .err,
.token = id_token,
});
return self.addErrorDetailsAndFail(.{
.err = .win32_non_ascii_ordinal,
.type = .note,
.token = id_token,
.print_source_line = false,
.extra = .{ .number = win32_rc_ordinal.ordinal },
});
},
};
}

pub const ResourceHeader = struct {
name_value: NameOrOrdinal,
type_value: NameOrOrdinal,
Expand All @@ -2537,7 +2516,9 @@ pub const Compiler = struct {
characteristics: DWORD,
data_version: DWORD = 0,

pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) !ResourceHeader {
pub const InitError = error{ OutOfMemory, IdNonAsciiOrdinal, TypeNonAsciiOrdinal };

pub fn init(allocator: Allocator, id_bytes: SourceBytes, type_bytes: SourceBytes, data_size: DWORD, language: res.Language, version: DWORD, characteristics: DWORD) InitError!ResourceHeader {
const type_value = type: {
const resource_type = Resource.fromString(type_bytes);
if (res.RT.fromResource(resource_type)) |rt_constant| {
Expand All @@ -2547,9 +2528,19 @@ pub const Compiler = struct {
}
};
errdefer type_value.deinit(allocator);
if (type_value == .name) {
if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(type_bytes)) |_| {
return error.TypeNonAsciiOrdinal;
}
}

const name_value = try NameOrOrdinal.fromString(allocator, id_bytes);
errdefer name_value.deinit(allocator);
if (name_value == .name) {
if (NameOrOrdinal.maybeNonAsciiOrdinalFromString(id_bytes)) |_| {
return error.IdNonAsciiOrdinal;
}
}

const predefined_resource_type = type_value.predefinedResourceType();

Expand Down Expand Up @@ -4461,10 +4452,10 @@ test "filename evaluation" {
null,
tmp_dir.dir,
);
// Same with number literals (0xB2 is ² in Windows-1252)
// Number literals are accepted
try testCompileErrorDetailsWithDir(
&.{.{ .type = .err, .str = "unable to open file '-': FileNotFound" }},
"#pragma code_page(1252)\n1 RCDATA -1\xB2",
&.{.{ .type = .err, .str = "unable to open file '-12': FileNotFound" }},
"#pragma code_page(1252)\n1 RCDATA -12",
null,
tmp_dir.dir,
);
Expand Down
19 changes: 19 additions & 0 deletions src/errors.zig
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ pub const ErrorDetails = struct {
unfinished_string_literal,
string_literal_too_long,
invalid_number_with_exponent,
invalid_digit_character_in_number_literal,
illegal_byte,
illegal_byte_outside_string_literals,
illegal_codepoint_outside_string_literals,
Expand Down Expand Up @@ -381,13 +382,18 @@ pub const ErrorDetails = struct {
dialog_menu_id_was_uppercased,
/// `menu_or_class` is populated and contains the type of the parameter statement
duplicate_menu_or_class_skipped,
invalid_digit_character_in_ordinal,

// Literals
/// `number` is populated
rc_would_miscompile_codepoint_byte_swap,
/// `number` is populated
rc_would_miscompile_codepoint_skip,
tab_converted_to_spaces,

// General (used in various places)
/// `number` is populated and contains the value that the ordinal would have in the Win32 RC compiler implementation
win32_non_ascii_ordinal,
};

pub fn render(self: ErrorDetails, writer: anytype, source: []const u8, strings: []const []const u8) !void {
Expand All @@ -401,6 +407,11 @@ pub const ErrorDetails = struct {
.invalid_number_with_exponent => {
return writer.print("base 10 number literal with exponent is not allowed: {s}", .{self.token.slice(source)});
},
.invalid_digit_character_in_number_literal => switch (self.type) {
.err, .warning => return writer.writeAll("non-ASCII digit characters are not allowed in number literals"),
.note => return writer.writeAll("the Win32 RC compiler allows non-ASCII digit characters, but will miscompile them"),
.hint => return,
},
.illegal_byte => {
return writer.print("character '{s}' is not allowed", .{std.fmt.fmtSliceEscapeUpper(self.token.slice(source))});
},
Expand Down Expand Up @@ -720,6 +731,9 @@ pub const ErrorDetails = struct {
@tagName(self.extra.menu_or_class),
});
},
.invalid_digit_character_in_ordinal => {
return writer.writeAll("non-ASCII digit characters are not allowed in ordinal (number) values");
},
.rc_would_miscompile_codepoint_byte_swap => switch (self.type) {
.err, .warning => return writer.print("codepoint U+{X} within a string literal would be miscompiled by the Win32 RC compiler (the bytes of the UTF-16 code unit would be swapped)", .{self.extra.number}),
.note => return writer.print("to avoid the potential miscompilation, an integer escape sequence in a wide string literal could be used instead: L\"\\x{X}\"", .{self.extra.number}),
Expand All @@ -735,6 +749,11 @@ pub const ErrorDetails = struct {
.note => return writer.writeAll("to include the tab character itself in a string, the escape sequence \\t should be used"),
.hint => return,
},
.win32_non_ascii_ordinal => switch (self.type) {
.err, .warning => unreachable,
.note => return writer.print("the Win32 RC compiler would accept this as an ordinal but its value would be {}", .{self.extra.number}),
.hint => return,
},
}
}

Expand Down
Loading

0 comments on commit 98a39f8

Please sign in to comment.