Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions crates/ra_ide/src/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use ra_prof::profile;
use ra_syntax::{
algo,
ast::{self, make, AstNode},
Location, SyntaxNode, TextRange, T,
SyntaxNode, TextRange, T,
};
use ra_text_edit::{TextEdit, TextEditBuilder};

Expand All @@ -29,7 +29,7 @@ pub(crate) fn diagnostics(db: &RootDatabase, file_id: FileId) -> Vec<Diagnostic>
let mut res = Vec::new();

res.extend(parse.errors().iter().map(|err| Diagnostic {
range: location_to_range(err.location()),
range: err.range(),
message: format!("Syntax Error: {}", err),
severity: Severity::Error,
fix: None,
Expand Down Expand Up @@ -116,12 +116,6 @@ pub(crate) fn diagnostics(db: &RootDatabase, file_id: FileId) -> Vec<Diagnostic>
drop(sink);
res.into_inner()
}
fn location_to_range(location: Location) -> TextRange {
match location {
Location::Offset(offset) => TextRange::offset_len(offset, 1.into()),
Location::Range(range) => range,
}
}

fn check_unnecessary_braces_in_use_statement(
acc: &mut Vec<Diagnostic>,
Expand Down
8 changes: 3 additions & 5 deletions crates/ra_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@ use crate::syntax_node::GreenNode;
pub use crate::{
algo::InsertPosition,
ast::{AstNode, AstToken},
parsing::{
lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError,
},
parsing::{lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token},
ptr::{AstPtr, SyntaxNodePtr},
syntax_error::{Location, SyntaxError, SyntaxErrorKind},
syntax_error::SyntaxError,
syntax_node::{
Direction, NodeOrToken, SyntaxElement, SyntaxNode, SyntaxToken, SyntaxTreeBuilder,
},
Expand Down Expand Up @@ -117,7 +115,7 @@ impl Parse<SourceFile> {
pub fn debug_dump(&self) -> String {
let mut buf = format!("{:#?}", self.tree().syntax());
for err in self.errors.iter() {
writeln!(buf, "error {:?}: {}", err.location(), err.kind()).unwrap();
writeln!(buf, "error {:?}: {}", err.range(), err).unwrap();
}
buf
}
Expand Down
92 changes: 26 additions & 66 deletions crates/ra_syntax/src/parsing/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! It is just a bridge to `rustc_lexer`.

use crate::{
SyntaxError, SyntaxErrorKind,
SyntaxError,
SyntaxKind::{self, *},
TextRange, TextUnit,
};
Expand Down Expand Up @@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
let token_len = TextUnit::from_usize(rustc_token.len);
let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);

let (syntax_kind, error) =
let (syntax_kind, err_message) =
rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);

tokens.push(Token { kind: syntax_kind, len: token_len });

if let Some(error) = error {
errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
if let Some(err_message) = err_message {
errors.push(SyntaxError::new(err_message, token_range));
}

offset += rustc_token.len;
Expand Down Expand Up @@ -94,76 +94,37 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
}

let rustc_token = rustc_lexer::first_token(text);
let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);

let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
let error = error.map(|error| {
SyntaxError::new(
SyntaxErrorKind::TokenizeError(error),
TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
)
let optional_error = err_message.map(|err_message| {
SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text)))
});

Some((token, error))
}

// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067

/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
/// It describes all the types of errors that may happen during the tokenization
/// of Rust source.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TokenizeError {
/// Base prefix was provided, but there were no digits
/// after it, e.g. `0x`, `0b`.
EmptyInt,
/// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`,
EmptyExponent,

/// Block comment lacks trailing delimiter `*/`
UnterminatedBlockComment,
/// Character literal lacks trailing delimiter `'`
UnterminatedChar,
/// Characterish byte literal lacks trailing delimiter `'`
UnterminatedByte,
/// String literal lacks trailing delimiter `"`
UnterminatedString,
/// Byte string literal lacks trailing delimiter `"`
UnterminatedByteString,
/// Raw literal lacks trailing delimiter e.g. `"##`
UnterminatedRawString,
/// Raw byte string literal lacks trailing delimiter e.g. `"##`
UnterminatedRawByteString,

/// Raw string lacks a quote after the pound characters e.g. `r###`
UnstartedRawString,
/// Raw byte string lacks a quote after the pound characters e.g. `br###`
UnstartedRawByteString,

/// Lifetime starts with a number e.g. `'4ever`
LifetimeStartsWithNumber,
Some((token, optional_error))
}

/// Returns `SyntaxKind` and an optional tokenize error message.
fn rustc_token_kind_to_syntax_kind(
rustc_token_kind: &rustc_lexer::TokenKind,
token_text: &str,
) -> (SyntaxKind, Option<TokenizeError>) {
) -> (SyntaxKind, Option<&'static str>) {
// A note on an intended tradeoff:
// We drop some useful infromation here (see patterns with double dots `..`)
// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
// being `u16` that come from `rowan::SyntaxKind`.

let syntax_kind = {
use rustc_lexer::TokenKind as TK;
use TokenizeError as TE;

match rustc_token_kind {
TK::LineComment => COMMENT,

TK::BlockComment { terminated: true } => COMMENT,
TK::BlockComment { terminated: false } => {
return (COMMENT, Some(TE::UnterminatedBlockComment));
return (
COMMENT,
Some("Missing trailing `*/` symbols to terminate the block comment"),
);
}

TK::Whitespace => WHITESPACE,
Expand All @@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind(

TK::Lifetime { starts_with_number: false } => LIFETIME,
TK::Lifetime { starts_with_number: true } => {
return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
return (LIFETIME, Some("Lifetime name cannot start with a number"))
}

TK::Semi => SEMI,
Expand Down Expand Up @@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind(

return (syntax_kind, None);

fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
use rustc_lexer::LiteralKind as LK;
use TokenizeError as TE;

#[rustfmt::skip]
let syntax_kind = match *kind {
LK::Int { empty_int: false, .. } => INT_NUMBER,
LK::Int { empty_int: true, .. } => {
return (INT_NUMBER, Some(TE::EmptyInt))
return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
}

LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
LK::Float { empty_exponent: true, .. } => {
return (FLOAT_NUMBER, Some(TE::EmptyExponent))
return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
}

LK::Char { terminated: true } => CHAR,
LK::Char { terminated: false } => {
return (CHAR, Some(TE::UnterminatedChar))
return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
}

LK::Byte { terminated: true } => BYTE,
LK::Byte { terminated: false } => {
return (BYTE, Some(TE::UnterminatedByte))
return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
}

LK::Str { terminated: true } => STRING,
LK::Str { terminated: false } => {
return (STRING, Some(TE::UnterminatedString))
return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
}


LK::ByteStr { terminated: true } => BYTE_STRING,
LK::ByteStr { terminated: false } => {
return (BYTE_STRING, Some(TE::UnterminatedByteString))
return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
}

LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
LK::RawStr { started: true, terminated: false, .. } => {
return (RAW_STRING, Some(TE::UnterminatedRawString))
return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
}
LK::RawStr { started: false, .. } => {
return (RAW_STRING, Some(TE::UnstartedRawString))
return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal"))
}

LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
LK::RawByteStr { started: true, terminated: false, .. } => {
return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
}
LK::RawByteStr { started: false, .. } => {
return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal"))
}
};

Expand Down
84 changes: 67 additions & 17 deletions crates/ra_syntax/src/parsing/reparsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse(
edit: &AtomTextEdit,
errors: Vec<SyntaxError>,
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
if let Some((green, old_range)) = reparse_token(node, &edit) {
return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range));
if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
}

if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
Expand All @@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse(
fn reparse_token<'node>(
root: &'node SyntaxNode,
edit: &AtomTextEdit,
) -> Option<(GreenNode, TextRange)> {
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
let prev_token_kind = prev_token.kind();
match prev_token_kind {
Expand All @@ -54,7 +54,7 @@ fn reparse_token<'node>(
}

let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?;
let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;

if new_token_kind != prev_token_kind
|| (new_token_kind == IDENT && is_contextual_kw(&new_text))
Expand All @@ -76,7 +76,11 @@ fn reparse_token<'node>(

let new_token =
GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
Some((prev_token.replace_with(new_token), prev_token.text_range()))
Some((
prev_token.replace_with(new_token),
new_err.into_iter().collect(),
prev_token.text_range(),
))
}
_ => None,
}
Expand All @@ -87,7 +91,7 @@ fn reparse_block<'node>(
edit: &AtomTextEdit,
) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
let (node, reparser) = find_reparsable_node(root, edit.delete)?;
let text = get_text_after_edit(node.clone().into(), &edit);
let text = get_text_after_edit(node.clone().into(), edit);

let (tokens, new_lexer_errors) = tokenize(&text);
if !is_balanced(&tokens) {
Expand Down Expand Up @@ -162,20 +166,27 @@ fn is_balanced(tokens: &[Token]) -> bool {
fn merge_errors(
old_errors: Vec<SyntaxError>,
new_errors: Vec<SyntaxError>,
old_range: TextRange,
range_before_reparse: TextRange,
edit: &AtomTextEdit,
) -> Vec<SyntaxError> {
let mut res = Vec::new();
for e in old_errors {
if e.offset() <= old_range.start() {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AHTUNG! I suppose in this if statement we had a mistake, we need to check that e.range().end() <= old_range.start() and I corrected it in this PR (tests pass anyway...)

res.push(e)
} else if e.offset() >= old_range.end() {
res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));

for old_err in old_errors {
let old_err_range = old_err.range();
// FIXME: make sure that .start() was here previously by a mistake
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixme will be removed before merging this PR

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be TODO then

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I just temporarily marked it FIXME to let it pass no todo tests for a while.

if old_err_range.end() <= range_before_reparse.start() {
res.push(old_err);
} else if old_err_range.start() >= range_before_reparse.end() {
let inserted_len = TextUnit::of_str(&edit.insert);
res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
// Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
}
}
for e in new_errors {
res.push(e.add_offset(old_range.start(), 0.into()));
}
res.extend(new_errors.into_iter().map(|new_err| {
// fighting borrow checker with a variable ;)
let offseted_range = new_err.range() + range_before_reparse.start();
new_err.with_range(offseted_range)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am surprised how this reports a move after borrow error

new_err.with_range(*new_err.range() + range_before_reparse.start())

// where 
fn with_range(mut self, range: TextRange) -> Self;
fn range(&self) -> &TextRange;

}));
res
}

Expand All @@ -193,9 +204,9 @@ mod tests {

let fully_reparsed = SourceFile::parse(&after);
let incrementally_reparsed: Parse<SourceFile> = {
let f = SourceFile::parse(&before);
let before = SourceFile::parse(&before);
let (green, new_errors, range) =
incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap();
incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
Parse::new(green, new_errors)
};
Expand All @@ -204,6 +215,7 @@ mod tests {
&format!("{:#?}", fully_reparsed.tree().syntax()),
&format!("{:#?}", incrementally_reparsed.tree().syntax()),
);
assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
}

#[test] // FIXME: some test here actually test token reparsing
Expand Down Expand Up @@ -402,4 +414,42 @@ enum Foo {
4,
);
}

#[test]
fn reparse_str_token_with_error_unchanged() {
do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
}

#[test]
fn reparse_str_token_with_error_fixed() {
do_check(r#""unterinated<|><|>"#, "\"", 12);
}

#[test]
fn reparse_block_with_error_in_middle_unchanged() {
do_check(
r#"fn main() {
if {}
32 + 4<|><|>
return
if {}
}"#,
"23",
105,
)
}

#[test]
fn reparse_block_with_error_in_middle_fixed() {
do_check(
r#"fn main() {
if {}
32 + 4<|><|>
return
if {}
}"#,
";",
105,
)
}
}
Loading