Skip to content

Commit

Permalink
Initial API for Scanner.
Browse files Browse the repository at this point in the history
Provides an interface for _lexing_ PartiQL.  Traditionally we would
factor this as the low-level interface for the parser, but with the PEG
based implementation, we just surface the relevant parts of the parser
as a rule to parse with.

This commit helps explore how to effectively integrate with the somewhat
low-level APIs for parsing that Pest provides.

* Adds `scanner` module.
* Adds `Scanner` trait to the prelude.
* Makes `PartiQLParser` public to crate.
* Refactors `LineAndColumn` as a tuple for `Position::At`.
  - Adds this to the `prelude`.
* Changes entry point for PEG to `Query` and added `Scanner` as the
  entry point rules for implementing the `Scanner` API.
* Adds `PairsExt`/`PairExt` trait/impl to add utility methods for working
  with Pest `Pairs`/`Pair`.
* Adds `LineAndColumn::position_from` and cleans up some doc/doc tests.

Resolves #13.
  • Loading branch information
almann committed May 14, 2021
1 parent 9563ec6 commit 4e47ac3
Show file tree
Hide file tree
Showing 6 changed files with 501 additions and 19 deletions.
44 changes: 44 additions & 0 deletions partiql-parser/src/lib.rs
Expand Up @@ -2,10 +2,54 @@

//! Provides a parser for the [PartiQL][partiql] query language.
//!
//! # Usage
//!
//! An API to interact with PartiQL tokens is the [`mod@scanner`] module.
//! The [`scanner()`] function creates a [`Scanner`](scanner::Scanner) instance
//! that one can use to parse tokens incrementally from some input slice.
//!
//! ```
//! use partiql_parser::prelude::*;
//! use partiql_parser::scanner;
//!
//! fn main() -> ParserResult<()> {
//! use partiql_parser::scanner::Content::*;
//!
//! let mut scanner = scanner("SELECT FROM");
//! let first = scanner.next_token()?;
//!
//! // get the parsed variant of the token
//! match first.content() {
//! Keyword(kw) => assert_eq!("SELECT", kw),
//! }
//! // the entire text of a token can be fetched--which looks the roughly the
//! // same for a keyword.
//! assert_eq!("SELECT", first.text());
//!
//! let second = scanner.next_token()?;
//! // get the parsed variant of the token
//! match second.content() {
//! Keyword(kw) => assert_eq!("FROM", kw),
//! }
//! // the other thing we can do is get line/column information from a token
//! assert_eq!(LineAndColumn::at(1, 8), second.start_loc());
//! assert_eq!(LineAndColumn::at(1, 12), second.end_loc());
//!
//! // this API is built on immutable slices, so we can restart scanning from any token
//! scanner = first.into();
//! let second_again = scanner.next_token()?;
//! assert_eq!(second, second_again);
//!
//! Ok(())
//! }
//! ```
//!
//! [partiql]: https://partiql.org

mod peg;
pub mod prelude;
pub mod result;
pub mod scanner;

pub use peg::recognize_partiql;
pub use scanner::scanner;
12 changes: 9 additions & 3 deletions partiql-parser/src/partiql.pest
@@ -1,9 +1,15 @@
WHITESPACE = _{ " " | "\t" | "\x0B" | "\x0C" | "\r" | "\n" }

// really basic rules to just detect a sequence of keywords
// two dip our toes in Pest
// TODO implement a full grammar, this is a very primitive version to start
// working with Pest and its APIs.

Keywords = _{ SOI ~ Keyword+ ~ EOI}
// Entry point for full query parsing
Query = _{ SOI ~ Keyword+ ~ EOI}

// Entry point for query "scanning"
// Note that this is factored this way to support an iteration style API
// where we can call back into this rule on subsequent input.
Scanner = _{ SOI ~ Keyword }

Keyword = { AllKeywords }

Expand Down
67 changes: 57 additions & 10 deletions partiql-parser/src/peg.rs
Expand Up @@ -4,12 +4,63 @@
//! can be exported for users to consume.

use crate::prelude::*;
use pest::Parser;
use crate::result::syntax_error;
use pest::iterators::{Pair, Pairs};
use pest::{Parser, RuleType};
use pest_derive::Parser;

#[derive(Parser)]
#[grammar = "partiql.pest"]
struct PartiQLParser;
pub(crate) struct PartiQLParser;

/// Extension methods for working with [`Pairs`].
pub(crate) trait PairsExt<'val, R: RuleType> {
/// Consumes a [`Pairs`] as a singleton, returning an error if there are less or more than
/// one [`Pair`].
fn exactly_one(self) -> ParserResult<Pair<'val, R>>;
}

impl<'val, R: RuleType> PairsExt<'val, R> for Pairs<'val, R> {
fn exactly_one(mut self) -> ParserResult<Pair<'val, R>> {
match self.next() {
Some(pair) => {
// make sure there isn't something more...
if let Some(other_pair) = self.next() {
syntax_error(
format!("Expected one token pair, got: {:?}, {:?}", pair, other_pair),
pair.start_loc().into(),
)?;
}
Ok(pair)
}
None => syntax_error(
"Expected at one token pair, got nothing!",
Position::Unknown,
),
}
}
}

/// Extension methods for working with [`Pair`].
pub(crate) trait PairExt<'val, R: RuleType> {
/// Translates the start position of the [`Pair`] into a [`LineAndColumn`].
fn start_loc(&self) -> LineAndColumn;

/// Translates the end position of the [`Pair`] into a [`LineAndColumn`].
fn end_loc(&self) -> LineAndColumn;
}

impl<'val, R: RuleType> PairExt<'val, R> for Pair<'val, R> {
#[inline]
fn start_loc(&self) -> LineAndColumn {
self.as_span().start_pos().line_col().into()
}

#[inline]
fn end_loc(&self) -> LineAndColumn {
self.as_span().end_pos().line_col().into()
}
}

/// Recognizer for PartiQL queries.
///
Expand All @@ -18,7 +69,7 @@ struct PartiQLParser;
///
/// This API will be replaced with one that produces an AST in the future.
pub fn recognize_partiql(input: &str) -> ParserResult<()> {
PartiQLParser::parse(Rule::Keywords, input)?;
PartiQLParser::parse(Rule::Query, input)?;
Ok(())
}

Expand All @@ -34,13 +85,9 @@ mod tests {
#[test]
fn error() -> ParserResult<()> {
match recognize_partiql("SELECT FROM MOO") {
Err(ParserError::SyntaxError { position, .. }) => assert_eq!(
Position::At {
line: 1,
column: 13
},
position
),
Err(ParserError::SyntaxError { position, .. }) => {
assert_eq!(Position::at(1, 13), position)
}
_ => panic!("Expected Syntax Error"),
};
Ok(())
Expand Down
2 changes: 2 additions & 0 deletions partiql-parser/src/prelude.rs
Expand Up @@ -3,6 +3,8 @@
//! Convenience export of common traits and basic types that are almost always
//! needed when using the parser APIs.

pub use crate::result::LineAndColumn;
pub use crate::result::ParserError;
pub use crate::result::ParserResult;
pub use crate::result::Position;
pub use crate::scanner::Scanner;
125 changes: 119 additions & 6 deletions partiql-parser/src/result.rs
Expand Up @@ -4,33 +4,140 @@

use pest::error::{ErrorVariant, LineColLocation};
use std::fmt;
use std::fmt::Formatter;
use thiserror::Error;

/// Position in the source for an error.
/// A line and column location.
///
/// This value is one-based, as that is how most people think of lines and columns.
///
/// ## Example
/// ```
/// # use partiql_parser::prelude::*;
/// println!("Beginning of a document: {}", LineAndColumn::at(1, 1));
/// ```
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct LineAndColumn(pub usize, pub usize);

impl LineAndColumn {
/// Constructs a [`LineAndColumn`].
#[inline]
pub fn at(line: usize, column: usize) -> Self {
Self(line, column)
}

/// Returns a [`LineAndColumn`] that repositions this position relative
/// to the given one one as a sort of "origin."
///
/// Note that this positioning is 1-based, so repositioning `(1, 1)` from `(1, 1)` is a no-op.
///
/// ## Examples
/// ```
/// # use partiql_parser::prelude::*;
/// assert_eq!(
/// LineAndColumn::at(1, 1),
/// LineAndColumn::at(1, 1).position_from(LineAndColumn::at(1, 1))
/// );
/// ```
///
/// ```
/// # use partiql_parser::prelude::*;
/// assert_eq!(
/// LineAndColumn::at(1, 2),
/// LineAndColumn::at(1, 2).position_from(LineAndColumn::at(1, 1))
/// );
/// ```
///
/// ```
/// # use partiql_parser::prelude::*;
/// assert_eq!(
/// LineAndColumn::at(5, 10),
/// LineAndColumn::at(1, 4).position_from(LineAndColumn::at(5, 7))
/// );
/// ```
///
/// ```
/// # use partiql_parser::prelude::*;
/// assert_eq!(
/// LineAndColumn::at(21, 2),
/// LineAndColumn::at(20, 2).position_from(LineAndColumn::at(2, 15))
/// );
/// ```
pub fn position_from(self, location: LineAndColumn) -> Self {
match (location, self) {
(LineAndColumn(base_line, base_column), LineAndColumn(dest_line, dest_column)) => {
let diff_line = dest_line - 1;
if diff_line > 0 {
// we're moving lines, adjust the line and take the target column as-is
LineAndColumn::at(base_line + diff_line, dest_column)
} else {
// same line from base, adjust only the column
let diff_column = dest_column - 1;
LineAndColumn::at(base_line, base_column + diff_column)
}
}
}
}
}

impl From<(usize, usize)> for LineAndColumn {
fn from(line_and_column: (usize, usize)) -> Self {
let (line, column) = line_and_column;
Self::at(line, column)
}
}

impl fmt::Display for LineAndColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "line {}, column {}", self.0, self.1)
}
}

/// A possible position in the source.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum Position {
/// Variant indicating that there *is no* known location in source for some context.
Unknown,
At { line: usize, column: usize },
/// Variant indicating that there *is* a known location in source for some context.
At(LineAndColumn),
}

impl Position {
/// Shorthand for creating a [`Position::At`] variant.
#[inline]
pub fn at(line: usize, column: usize) -> Self {
Self::At(LineAndColumn::at(line, column))
}
}

impl From<LineAndColumn> for Position {
fn from(line_column: LineAndColumn) -> Self {
Self::At(line_column)
}
}

impl fmt::Display for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Position::Unknown => write!(f, "unknown position"),
Position::At { line, column } => write!(f, "line {}, column {}", *line, *column),
Position::At(location) => {
write!(f, "{}", location)
}
}
}
}

/// Errors from the PartiQL parser.
#[derive(Clone, Debug, Eq, PartialEq, Error)]
pub enum ParserError {
/// Indicates that there was a problem with syntax.
#[error("Syntax Error: {message} ({position})")]
SyntaxError { message: String, position: Position },
}

impl ParserError {
/// Convenience function to create a [SyntaxError](ParserError::SyntaxError).
/// Convenience function to create a [`SyntaxError`](ParserError::SyntaxError).
#[inline]
pub fn syntax_error<S: Into<String>>(message: S, position: Position) -> Self {
Self::SyntaxError {
message: message.into(),
Expand All @@ -39,6 +146,12 @@ impl ParserError {
}
}

/// Convenience function to create a `Err([SyntaxError](ParserError::SyntaxError))`.
#[inline]
pub fn syntax_error<T, S: Into<String>>(message: S, position: Position) -> ParserResult<T> {
Err(ParserError::syntax_error(message, position))
}

impl<R> From<pest::error::Error<R>> for ParserError
where
R: fmt::Debug,
Expand All @@ -54,7 +167,7 @@ where
ErrorVariant::ParsingError { positives, .. } => format!("Expected {:?}", positives),
ErrorVariant::CustomError { message } => message,
};
Self::syntax_error(message, Position::At { line, column })
Self::syntax_error(message, Position::at(line, column))
}
}

Expand All @@ -68,7 +181,7 @@ mod tests {

#[rstest]
#[case::syntax_error_with_pos(
ParserError::syntax_error("Boo", Position::At { line: 12, column: 3 }),
ParserError::syntax_error("Boo", Position::at(12, 3)),
"Syntax Error: Boo (line 12, column 3)"
)]
#[case::syntax_error_no_pos(
Expand Down

0 comments on commit 4e47ac3

Please sign in to comment.