Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix backtick escaping and add more tests #10980

Merged
merged 11 commits into from
May 14, 2024
117 changes: 57 additions & 60 deletions src/shell/shell.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2345,6 +2345,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
switch (char) {
// possibly double bracket open
'[' => {
comptime assertSpecialChar('[');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
if (self.peek()) |p| {
if (p.escaped or p.char != '[') break :escaped;
Expand All @@ -2371,6 +2373,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
break :escaped;
},
']' => {
comptime assertSpecialChar(']');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
if (self.peek()) |p| {
if (p.escaped or p.char != ']') break :escaped;
Expand Down Expand Up @@ -2398,6 +2402,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
},

'#' => {
comptime assertSpecialChar('#');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
const whitespace_preceding =
if (self.chars.prev) |prev|
Expand All @@ -2410,12 +2416,16 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
continue;
},
';' => {
comptime assertSpecialChar(';');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(true);
try self.tokens.append(.Semicolon);
continue;
},
'\n' => {
comptime assertSpecialChar('\n');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word_impl(true, true, false);
try self.tokens.append(.Newline);
Expand All @@ -2424,6 +2434,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

// glob asterisks
'*' => {
comptime assertSpecialChar('*');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
if (self.peek()) |next| {
if (!next.escaped and next.char == '*') {
Expand All @@ -2440,18 +2452,24 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

// brace expansion syntax
'{' => {
comptime assertSpecialChar('{');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(false);
try self.tokens.append(.BraceBegin);
continue;
},
',' => {
comptime assertSpecialChar(',');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(false);
try self.tokens.append(.Comma);
continue;
},
'}' => {
comptime assertSpecialChar('}');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(false);
try self.tokens.append(.BraceEnd);
Expand All @@ -2460,6 +2478,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

// Command substitution
'`' => {
comptime assertSpecialChar('`');

if (self.chars.state == .Single) break :escaped;
if (self.in_subshell == .backtick) {
try self.break_word(true);
Expand All @@ -2474,6 +2494,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
},
// Command substitution/vars
'$' => {
comptime assertSpecialChar('$');

if (self.chars.state == .Single) break :escaped;

const peeked = self.peek() orelse InputChar{ .char = 0 };
Expand Down Expand Up @@ -2509,12 +2531,16 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
continue;
},
'(' => {
comptime assertSpecialChar('(');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(true);
try self.eat_subshell(.normal);
continue;
},
')' => {
comptime assertSpecialChar(')');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
if (self.in_subshell != .dollar and self.in_subshell != .normal) {
self.add_error("Unexpected ')'");
Expand Down Expand Up @@ -2544,6 +2570,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
},

'0'...'9' => {
comptime for ('0'..'9') |c| assertSpecialChar(c);

if (self.chars.state != .Normal) break :escaped;
const snapshot = self.make_snapshot();
if (self.eat_redirect(input)) |redirect| {
Expand All @@ -2557,6 +2585,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

// Operators
'|' => {
comptime assertSpecialChar('|');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(true);

Expand All @@ -2577,20 +2607,26 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
continue;
},
'>' => {
comptime assertSpecialChar('>');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word_impl(true, false, true);
const redirect = self.eat_simple_redirect(.out);
try self.tokens.append(.{ .Redirect = redirect });
continue;
},
'<' => {
comptime assertSpecialChar('<');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word_impl(true, false, true);
const redirect = self.eat_simple_redirect(.in);
try self.tokens.append(.{ .Redirect = redirect });
continue;
},
'&' => {
comptime assertSpecialChar('&');

if (self.chars.state == .Single or self.chars.state == .Double) break :escaped;
try self.break_word(true);

Expand Down Expand Up @@ -2619,6 +2655,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

// 2. State switchers
'\'' => {
comptime assertSpecialChar('\'');

if (self.chars.state == .Single) {
self.chars.state = .Normal;
continue;
Expand All @@ -2630,6 +2668,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {
break :escaped;
},
'"' => {
comptime assertSpecialChar('"');

if (self.chars.state == .Single) break :escaped;
if (self.chars.state == .Normal) {
try self.break_word(false);
Expand All @@ -2644,6 +2684,8 @@ pub fn NewLexer(comptime encoding: StringEncoding) type {

// 3. Word breakers
' ' => {
comptime assertSpecialChar(' ');

if (self.chars.state == .Normal) {
try self.break_word_impl(true, true, false);
continue;
Expand Down Expand Up @@ -3961,7 +4003,18 @@ pub const ShellSrcBuilder = struct {
};

/// Characters that need to escaped
const SPECIAL_CHARS = [_]u8{ '$', '>', '&', '|', '=', ';', '\n', '{', '}', ',', '(', ')', '\\', '\"', ' ', '\'' };
const SPECIAL_CHARS = [_]u8{ '~', '[', ']', '#', ';', '\n', '*', '{', ',', '}', '`', '$', '=', '(', ')', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '|', '>', '<', '&', '\'', '"', ' ', '\\' };
const SPECIAL_CHARS_TABLE: std.bit_set.IntegerBitSet(256) = brk: {
var table = std.bit_set.IntegerBitSet(256).initEmpty();
for (SPECIAL_CHARS) |c| {
table.set(c);
}
break :brk table;
};
pub fn assertSpecialChar(c: u8) void {
comptime bun.assert(@inComptime());
bun.assert(SPECIAL_CHARS_TABLE.isSet(c));
}
/// Characters that need to be backslashed inside double quotes
const BACKSLASHABLE_CHARS = [_]u8{ '$', '`', '"', '\\' };

Expand Down Expand Up @@ -4070,39 +4123,11 @@ pub fn needsEscapeBunstr(bunstr: bun.String) bool {
return needsEscapeUtf8AsciiLatin1(bunstr.byteSlice());
}

pub fn needsEscapeUTF16Slow(str: []const u16) bool {
for (str) |codeunit| {
inline for (SPECIAL_CHARS) |spc| {
if (@as(u16, @intCast(spc)) == codeunit) return true;
}
}

return false;
}

pub fn needsEscapeUTF16(str: []const u16) bool {
if (str.len < 64) return needsEscapeUTF16Slow(str);

const needles = comptime brk: {
var needles: [SPECIAL_CHARS.len]@Vector(8, u16) = undefined;
for (SPECIAL_CHARS, 0..) |c, i| {
needles[i] = @splat(@as(u16, @intCast(c)));
}
break :brk needles;
};

var i: usize = 0;
while (i + 8 <= str.len) : (i += 8) {
const haystack: @Vector(8, u16) = str[i..][0..8].*;

inline for (needles) |needle| {
const result = haystack == needle;
if (std.simd.firstTrue(result) != null) return true;
}
for (str) |codeunit| {
if (codeunit < 0xff and SPECIAL_CHARS_TABLE.isSet(codeunit)) return true;
}

if (i < str.len) return needsEscapeUTF16Slow(str[i..]);

return false;
}

Expand All @@ -4111,36 +4136,8 @@ pub fn needsEscapeUTF16(str: []const u16) bool {
/// false positives, but it is faster than running the shell lexer through the
/// input string for a more correct implementation.
pub fn needsEscapeUtf8AsciiLatin1(str: []const u8) bool {
if (str.len < 128) return needsEscapeUtf8AsciiLatin1Slow(str);

const needles = comptime brk: {
var needles: [SPECIAL_CHARS.len]@Vector(16, u8) = undefined;
for (SPECIAL_CHARS, 0..) |c, i| {
needles[i] = @splat(c);
}
break :brk needles;
};

var i: usize = 0;
while (i + 16 <= str.len) : (i += 16) {
const haystack: @Vector(16, u8) = str[i..][0..16].*;

inline for (needles) |needle| {
const result = haystack == needle;
if (std.simd.firstTrue(result) != null) return true;
}
}

if (i < str.len) return needsEscapeUtf8AsciiLatin1Slow(str[i..]);

return false;
}

pub fn needsEscapeUtf8AsciiLatin1Slow(str: []const u8) bool {
for (str) |c| {
inline for (SPECIAL_CHARS) |spc| {
if (spc == c) return true;
}
if (SPECIAL_CHARS_TABLE.isSet(c)) return true;
}
return false;
}
Expand Down
51 changes: 50 additions & 1 deletion test/js/bun/shell/bunshell.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,56 @@ ${temp_dir}`
/**
*
*/
describe("escaping", () => {});
describe("escaping", () => {
// Testing characters that need special handling when not quoted or in different contexts
TestBuilder.command`echo ${"$"}`.stdout("$\n").runAsTest("dollar");
TestBuilder.command`echo ${">"}`.stdout(">\n").runAsTest("right_arrow");
TestBuilder.command`echo ${"&"}`.stdout("&\n").runAsTest("ampersand");
TestBuilder.command`echo ${"|"}`.stdout("|\n").runAsTest("pipe");
TestBuilder.command`echo ${"="}`.stdout("=\n").runAsTest("equals");
TestBuilder.command`echo ${";"}`.stdout(";\n").runAsTest("semicolon");
TestBuilder.command`echo ${"\n"}`.stdout("\n\n").runAsTest("newline");
TestBuilder.command`echo ${"{"}`.stdout("{\n").runAsTest("left_brace");
TestBuilder.command`echo ${"}"}`.stdout("}\n").runAsTest("right_brace");
TestBuilder.command`echo ${","}`.stdout(",\n").runAsTest("comma");
TestBuilder.command`echo ${"("}`.stdout("(\n").runAsTest("left_parenthesis");
TestBuilder.command`echo ${")"}`.stdout(")\n").runAsTest("right_parenthesis");
TestBuilder.command`echo ${"\\"}`.stdout("\\\n").runAsTest("backslash");
TestBuilder.command`echo ${" "}`.stdout(" \n").runAsTest("space");
TestBuilder.command`echo ${"'hello'"}`.stdout("'hello'\n").runAsTest("single_quote");
TestBuilder.command`echo ${'"hello"'}`.stdout('"hello"\n').runAsTest("double_quote");
TestBuilder.command`echo ${"`hello`"}`.stdout("`hello`\n").runAsTest("backtick");

// Testing characters that need to be escaped within double quotes
TestBuilder.command`echo "${"$"}"`.stdout("$\n").runAsTest("dollar_in_dquotes");
TestBuilder.command`echo "${"`"}"`.stdout("`\n").runAsTest("backtick_in_dquotes");
TestBuilder.command`echo "${'"'}"`.stdout('"\n').runAsTest("double_quote_in_dquotes");
TestBuilder.command`echo "${"\\"}"`.stdout("\\\n").runAsTest("backslash_in_dquotes");

// Testing characters that need to be escaped within single quotes
TestBuilder.command`echo '${"$"}'`.stdout("$\n").runAsTest("dollar_in_squotes");
TestBuilder.command`echo '${'"'}'`.stdout('"\n').runAsTest("double_quote_in_squotes");
TestBuilder.command`echo '${"`"}'`.stdout("`\n").runAsTest("backtick_in_squotes");
TestBuilder.command`echo '${"\\\\"}'`.stdout("\\\\\n").runAsTest("backslash_in_squotes");

// Ensure that backslash escapes within single quotes are treated literally
TestBuilder.command`echo '${"\\"}'`.stdout("\\\n").runAsTest("literal_backslash_single_quote");
TestBuilder.command`echo '${"\\\\"}'`.stdout("\\\\\n").runAsTest("double_backslash_single_quote");

// Edge cases with mixed quotes
TestBuilder.command`echo "'\${"$"}'"`.stdout("'${$}'\n").runAsTest("mixed_quotes_dollar");
TestBuilder.command`echo '"${"`"}"'`.stdout('"`"\n').runAsTest("mixed_quotes_backtick");

// Compound command with special characters
TestBuilder.command`echo ${"hello; echo world"}`.stdout("hello; echo world\n").runAsTest("compound_command");
TestBuilder.command`echo ${"hello > world"}`.stdout("hello > world\n").runAsTest("redirect_in_echo");
TestBuilder.command`echo ${"$(echo nested)"}`.stdout("$(echo nested)\n").runAsTest("nested_command_substitution");

// Pathological cases involving multiple special characters
TestBuilder.command`echo ${"complex > command; $(execute)"}`
.stdout("complex > command; $(execute)\n")
.runAsTest("complex_mixed_special_chars");
});
});

describe("deno_task", () => {
Expand Down
2 changes: 1 addition & 1 deletion test/js/bun/shell/env.positionals.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ $.nothrow();
describe("$ argv", async () => {
for (let i = 0; i < process.argv.length; i++) {
const element = process.argv[i];
TestBuilder.command`echo $${i}`
TestBuilder.command`echo $${{ raw: i }}`
.exitCode(0)
.stdout(process.argv[i] + "\n")
.runAsTest(`$${i} should equal process.argv[${i}]`);
Expand Down
Loading