Skip to content

Commit

Permalink
Fix bug in shebang handling
Browse files Browse the repository at this point in the history
Shebang handling was too agressive in stripping out the first line in cases where it is actually _not_ a shebang, but instead, valid rust (#70528). This is a second attempt at resolving this issue (the first attempt was flawed, for, among other reasons, causing an ICE in certain cases (#71372, #71471).

The behavior is now codified by a number of UI tests, but simply:
For the first line to be a shebang, the following must all be true:
1. The line must start with `#!`
2. The line must contain a non whitespace character after `#!`
3. The next character in the file, ignoring comments & whitespace must not be `[`

I believe this is a strict superset of what we used to allow, so perhaps a crater run is unnecessary, but probably not a terrible idea.
  • Loading branch information
rcoh committed May 25, 2020
1 parent ee6c0da commit a93d316
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 7 deletions.
26 changes: 19 additions & 7 deletions src/librustc_lexer/src/lib.rs
Expand Up @@ -236,16 +236,28 @@ pub enum Base {
}

/// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
/// but shebang isn't a part of rust syntax, so this function
/// skips the line if it starts with a shebang ("#!").
/// Line won't be skipped if it represents a valid Rust syntax
/// (e.g. "#![deny(missing_docs)]").
/// but shebang isn't a part of rust syntax.
pub fn strip_shebang(input: &str) -> Option<usize> {
debug_assert!(!input.is_empty());
if !input.starts_with("#!") || input.starts_with("#![") {
let first_line = input.lines().next()?;
// A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line.
let could_be_shebang =
first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c));
if !could_be_shebang {
return None;
}
Some(input.find('\n').unwrap_or(input.len()))
let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok|
!matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace)
);
let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket];
let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied());
if starts_with_attribute {
// If the file starts with #![ then it's definitely not a shebang -- it couldn't be
// a rust program since a Rust program can't start with `[`
None
} else {
// It's a #!... and there isn't a `[` in sight, must be a shebang
Some(first_line.len())
}
}

/// Parses the first token from the provided input string.
Expand Down
57 changes: 57 additions & 0 deletions src/librustc_lexer/src/tests.rs
Expand Up @@ -145,4 +145,61 @@ mod tests {
}),
);
}

#[test]
fn test_valid_shebang() {
// https://github.com/rust-lang/rust/issues/70528
let input = "#!/usr/bin/rustrun\nlet x = 5;";
assert_eq!(strip_shebang(input), Some(18));
}

#[test]
fn test_invalid_shebang_valid_rust_syntax() {
// https://github.com/rust-lang/rust/issues/70528
let input = "#! [bad_attribute]";
assert_eq!(strip_shebang(input), None);
}

#[test]
fn test_shebang_second_line() {
// Because shebangs are interpreted by the kernel, they must be on the first line
let input = "\n#!/bin/bash";
assert_eq!(strip_shebang(input), None);
}

#[test]
fn test_shebang_space() {
let input = "#! /bin/bash";
assert_eq!(strip_shebang(input), Some(input.len()));
}

#[test]
fn test_shebang_empty_shebang() {
let input = "#! \n[attribute(foo)]";
assert_eq!(strip_shebang(input), None);
}

#[test]
fn test_invalid_shebang_comment() {
let input = "#!//bin/ami/a/comment\n[";
assert_eq!(strip_shebang(input), None)
}

#[test]
fn test_invalid_shebang_another_comment() {
let input = "#!/*bin/ami/a/comment*/\n[attribute";
assert_eq!(strip_shebang(input), None)
}

#[test]
fn test_shebang_valid_rust_after() {
let input = "#!/*bin/ami/a/comment*/\npub fn main() {}";
assert_eq!(strip_shebang(input), Some(23))
}

#[test]
fn test_shebang_followed_by_attrib() {
let input = "#!/bin/rust-scripts\n#![allow_unused(true)]";
assert_eq!(strip_shebang(input), Some(19));
}
}
2 changes: 2 additions & 0 deletions src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs
@@ -0,0 +1,2 @@

#!B //~ expected `[`, found `B`
8 changes: 8 additions & 0 deletions src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr
@@ -0,0 +1,8 @@
error: expected `[`, found `B`
--> $DIR/issue-71471-ignore-tidy.rs:2:3
|
LL | #!B
| ^ expected `[`

error: aborting due to previous error

7 changes: 7 additions & 0 deletions src/test/ui/parser/shebang/multiline-attrib.rs
@@ -0,0 +1,7 @@
#!
[allow(unused_variables)]
// check-pass

fn main() {
let x = 5;
}
5 changes: 5 additions & 0 deletions src/test/ui/parser/shebang/regular-attrib.rs
@@ -0,0 +1,5 @@
#![allow(unused_variables)]
// check-pass
fn main() {
let x = 5;
}
9 changes: 9 additions & 0 deletions src/test/ui/parser/shebang/shebang-and-attrib.rs
@@ -0,0 +1,9 @@
#!/usr/bin/env run-cargo-script

// check-pass
#![allow(unused_variables)]


fn main() {
let x = 5;
}
6 changes: 6 additions & 0 deletions src/test/ui/parser/shebang/shebang-comment.rs
@@ -0,0 +1,6 @@
#!//bin/bash

// check-pass
fn main() {
println!("a valid shebang (that is also a rust comment)")
}
6 changes: 6 additions & 0 deletions src/test/ui/parser/shebang/shebang-must-start-file.rs
@@ -0,0 +1,6 @@
// something on the first line for tidy
#!/bin/bash //~ expected `[`, found `/`

fn main() {
println!("ok!");
}
8 changes: 8 additions & 0 deletions src/test/ui/parser/shebang/shebang-must-start-file.stderr
@@ -0,0 +1,8 @@
error: expected `[`, found `/`
--> $DIR/shebang-must-start-file.rs:2:3
|
LL | #!/bin/bash
| ^ expected `[`

error: aborting due to previous error

16 changes: 16 additions & 0 deletions src/test/ui/parser/shebang/sneaky-attrib.rs
@@ -0,0 +1,16 @@
#!//bin/bash


// This could not possibly be a shebang & also a valid rust file, since a Rust file
// can't start with `[`
/*
[ (mixing comments to also test that we ignore both types of comments)
*/
[allow(unused_variables)]
// check-pass
fn main() {
let x = 5;
}
6 changes: 6 additions & 0 deletions src/test/ui/parser/shebang/valid-shebang.rs
@@ -0,0 +1,6 @@
#!/usr/bin/env run-cargo-script

// check-pass
fn main() {
println!("Hello World!");
}
5 changes: 5 additions & 0 deletions src/tools/tidy/src/style.rs
Expand Up @@ -174,6 +174,11 @@ pub fn check(path: &Path, bad: &mut bool) {

let can_contain =
contents.contains("// ignore-tidy-") || contents.contains("# ignore-tidy-");
// Enable testing ICE's that require specific (untidy)
// file formats easily eg. `issue-1234-ignore-tidy.rs`
if filename.contains("ignore-tidy") {
return;
}
let mut skip_cr = contains_ignore_directive(can_contain, &contents, "cr");
let mut skip_undocumented_unsafe =
contains_ignore_directive(can_contain, &contents, "undocumented-unsafe");
Expand Down

0 comments on commit a93d316

Please sign in to comment.