Skip to content

Commit

Permalink
Reimplement partitioning.
Browse files Browse the repository at this point in the history
  • Loading branch information
olson-sean-k committed Jan 17, 2024
1 parent db7de20 commit 926b684
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 43 deletions.
6 changes: 6 additions & 0 deletions src/diagnostics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,18 @@ impl SpanExt for Span {

pub trait Spanned {
fn span(&self) -> &Span;

fn span_mut(&mut self) -> &mut Span;
}

impl Spanned for Span {
fn span(&self) -> &Span {
self
}

fn span_mut(&mut self) -> &mut Span {
self
}
}

/// Error associated with a [`Span`] within a glob expression.
Expand Down
171 changes: 128 additions & 43 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::{StrExt as _, PATHS_ARE_CASE_INSENSITIVE};

pub use crate::token::parse::{parse, ParseError, ROOT_SEPARATOR_EXPRESSION};
pub use crate::token::variance::invariant::{Breadth, Depth, Invariant, Size, Text};
pub use crate::token::variance::{invariant_text_prefix, Bound, Cardinality, Variance};
pub use crate::token::variance::{Bound, Cardinality, Variance};

// TODO: Expression and glob trees differ only in their annotation data. This supports the
// distinction, but is inflexible and greatly limits any intermediate representation of a
Expand All @@ -43,6 +43,10 @@ impl Spanned for GlobMetadata {
fn span(&self) -> &Span {
&self.span
}

fn span_mut(&mut self) -> &mut Span {
&mut self.span
}
}

// TODO: This metadata has no spans, as the expressions are disjoint. However, it should probably
Expand Down Expand Up @@ -102,7 +106,14 @@ pub enum When {
Never,
}

// TODO: Consider the name `Composition` or something similar.
// TODO: Is this the right abstraction? Is it useful? Consider that
//
// 1. Repetitions now modify a single token rather than reimplementing concatenation.
// 2. When partitioning, only concatenations can be drained without regard to semantics nor
// variance.
//
// The `Token::conjunction` and `Token::concatenation` functions are similar, but perhaps only
// the latter is useful? If so, this type and associated code should probably be removed.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Composition<T> {
Conjunctive(T),
Expand Down Expand Up @@ -178,7 +189,7 @@ impl<'t, A> Tokenized<'t, A> {

impl<'t, A> Tokenized<'t, A>
where
A: Spanned,
A: Default + Spanned,
{
pub fn partition(self) -> (PathBuf, Self) {
fn pop_expression_bytes(expression: &str, n: usize) -> &str {
Expand All @@ -189,31 +200,18 @@ where

let Tokenized {
expression,
mut tokens,
mut token,
} = self;

// Get the invariant prefix and its upper bound for the token sequence.
let prefix = variance::invariant_text_prefix(tokens.iter()).into();
let n = variance::invariant_text_prefix_upper_bound(&tokens);
let mut offset: usize = tokens
.iter()
.take(n)
let (n, text) = token.invariant_text_prefix();
let (popped, token) = token.pop_prefix_tokens(n);
let mut offset: usize = popped
.into_iter()
.map(|token| *token.annotation().span())
.sum();

// Drain invariant tokens from the beginning of the token sequence and unroot any tokens at
// the beginning of the variant sequence (tree wildcards). Finally, translate spans and
// discard the corresponding invariant bytes in the expression.
tokens.drain(0..n);
if tokens.first_mut().map_or(false, Token::unroot) {
// TODO: The relationship between roots, the unrooting operation, and the span in an
// expression that represents such a root (if any) is not captured by these APIs
// very well. Perhaps `unroot` should do more here?
// Pop additional bytes for the root separator expression if the initial token has lost
// a root.
offset += ROOT_SEPARATOR_EXPRESSION.len();
}
for token in tokens.iter_mut() {
let token = token.unwrap_or_else(|| Token::empty(Default::default()));
for token in token.concatenation() {
let start = token.annotation().0.saturating_sub(offset);
token.annotation.0 = start;
}
Expand All @@ -224,7 +222,7 @@ where
},
};

(prefix, Tokenized { expression, tokens })
(text.into(), Tokenized { expression, token })
}
}

Expand Down Expand Up @@ -325,8 +323,44 @@ impl<'t, A> Token<'t, A> {
}
}

pub fn unroot(&mut self) -> bool {
self.as_leaf_mut().map_or(false, LeafKind::unroot)
fn pop_prefix_tokens_with<F>(mut self, n: usize, mut f: F) -> (Vec<Self>, Option<Self>)
where
F: FnMut(&mut Self) -> (),

Check failure on line 328 in src/token/mod.rs

View workflow job for this annotation

GitHub Actions / Lint

unneeded unit return type
{
if n == 0 {
// Yield the token as-is if there are no tokens to pop.
return (vec![], Some(self));
}
if let Some(concatenation) = self.as_concatenation_mut() {
if n >= concatenation.tokens().len() {
// Pop the entire concatenation if exhausted.
return (vec![self], None);
}
// Pop `n` tokens and forward the first remaining token in the concatenation to `f`.
let tokens = concatenation.0.drain(0..n).collect();
f(concatenation.0.first_mut().unwrap());
(tokens, Some(self))
}
// Pop the entire token if it is not a concatenation (and `n` is not zero).
(vec![self], None)
}

fn pop_prefix_tokens(mut self, n: usize) -> (Vec<Self>, Option<Self>)
where
LeafKind<'t>: Unroot<A>,
{
self.pop_prefix_tokens_with(n, |first| {
first.unroot_boundary_component();
})
}

fn unroot_boundary_component(&mut self) -> bool
where
LeafKind<'t>: Unroot<A>,
{
let annotation = &mut self.annotation;
self.as_leaf_mut()
.map_or(false, |leaf| leaf.unroot(annotation))
}

pub fn fold<F>(&self, f: F) -> Option<F::Term>
Expand Down Expand Up @@ -577,6 +611,7 @@ impl<'t, A> Token<'t, A> {
})
}

// TODO: Consider replacing this with `concatenation`. See `Composition`.
pub fn conjunction(&self) -> &[Self] {
if let Some(Composition::Conjunctive(tokens)) = self.tokens() {
tokens
Expand All @@ -586,6 +621,15 @@ impl<'t, A> Token<'t, A> {
}
}

pub fn concatenation(&self) -> &[Self] {
if let Some(concatenation) = self.as_concatenation() {
concatenation.tokens()
}
else {
slice::from_ref(self)
}
}

pub fn composition(&self) -> Composition<()> {
self.tokens()
.map_or(Composition::Conjuntive(()), |tokens| tokens.map(|_| ()))
Expand Down Expand Up @@ -620,12 +664,7 @@ impl<'t, A> Token<'t, A> {
text: String,
}

let mut head = Prefix {
index: 0,
text: String::new(),
};
let mut checkpoint = None;
let tokens = self.conjunction().iter().peekable();
let tokens = self.concatenation().iter().peekable();
// TODO: The more correct predicate is, "Does this token have a root and variant text?"
// However, `has_root` is not quite correct, so rooted tree wildcards are detected
// explicitly instead to avoid shenanigans. See `has_root`.
Expand All @@ -634,6 +673,11 @@ impl<'t, A> Token<'t, A> {
}) {
return (0, String::from(Separator::INVARIANT_TEXT));
}
let mut head = Prefix {
index: 0,
text: String::new(),
};
let mut checkpoint = None;
for (n, token) in tokens.enumerate() {
match token.variance::<Text>() {
Variance::Invariant(text) => {
Expand All @@ -654,9 +698,9 @@ impl<'t, A> Token<'t, A> {
(head.index + 1, head.text)
}

// TODO: Is root an invariant? This query is a bit odd. It returns `true` for alternations and
// repetitions (even with a lower bound of zero). Either way, this should probably return
// `When`, not `bool`.
// TODO: This query is a bit odd. It returns `true` for alternations (even when not all
// branches are rooted) and repetitions (even with a lower bound of zero). Either way,
// this should probably return `When`, not `bool`.
pub fn has_root(&self) -> bool {
self.walk().starting().any(|(_, token)| {
token.as_leaf().map_or(false, |leaf| {
Expand Down Expand Up @@ -692,6 +736,13 @@ impl<'t, A> Token<'t, A> {
}
}

fn as_branch_mut(&mut self) -> Option<&mut BranchKind<'t, A>> {
match self.topology {
TokenTopology::Branch(ref mut branch) => Some(branch),
_ => None,
}
}

pub fn as_leaf(&self) -> Option<&LeafKind<'t>> {
match self.topology {
TokenTopology::Leaf(ref leaf) => Some(leaf),
Expand Down Expand Up @@ -727,6 +778,13 @@ impl<'t, A> Token<'t, A> {
})
}

fn as_concatenation_mut(&mut self) -> Option<&mut Concatenation<'t, A>> {
self.as_branch_mut().and_then(|branch| match branch {
BranchKind::Concatenation(ref mut concatenation) => Some(concatenation),
_ => None,
})
}

pub fn as_literal(&self) -> Option<&Literal<'t>> {
self.as_leaf().and_then(|leaf| match leaf {
LeafKind::Literal(ref literal) => Some(literal),
Expand Down Expand Up @@ -907,6 +965,10 @@ where
}
}

trait Unroot<A> {
fn unroot(&mut self, annotation: &mut A) -> bool;
}

#[derive(Clone, Debug)]
pub enum LeafKind<'t> {
Class(Class),
Expand All @@ -931,13 +993,6 @@ impl<'t> LeafKind<'t> {
}
}

pub fn unroot(&mut self) -> bool {
match self {
LeafKind::Wildcard(ref mut wildcard) => wildcard.unroot(),
_ => false,
}
}

pub fn boundary(&self) -> Option<Boundary> {
match self {
LeafKind::Separator(_) => Some(Boundary::Separator),
Expand Down Expand Up @@ -975,6 +1030,18 @@ impl From<Wildcard> for LeafKind<'static> {
}
}

impl<'t, A> Unroot<A> for LeafKind<'t>
where
Wildcard: Unroot<A>,
{
fn unroot(&mut self, annotation: &mut A) -> bool {
match self {
LeafKind::Wildcard(ref mut wildcard) => Unroot::unroot(wildcard, annotation),
_ => false,
}
}
}

impl<'t, T> VarianceTerm<T> for LeafKind<'t>
where
Class: VarianceTerm<T>,
Expand Down Expand Up @@ -1430,14 +1497,32 @@ pub enum Wildcard {
}

impl Wildcard {
pub fn unroot(&mut self) -> bool {
fn unroot(&mut self) -> bool {
match self {
Wildcard::Tree { ref mut has_root } => mem::replace(has_root, false),
_ => false,
}
}
}

impl<A> Unroot<A> for Wildcard
where
A: Spanned,
{
fn unroot(&mut self, annotation: &mut A) -> bool {
let unrooted = Wildcard::unroot(self);
if unrooted {
// Move the beginning of the span in the annotation forward to dissociate the token
// from any separator in the expression.
let n = ROOT_SEPARATOR_EXPRESSION.len();
let span = self.annotation.span_mut();
span.0 = span.0.saturating_add(n);
span.1 = span.1.saturating_sub(n);
}
unrooted
}
}

impl VarianceTerm<Breadth> for Wildcard {
fn term(&self) -> Variance<Breadth> {
match self {
Expand Down

0 comments on commit 926b684

Please sign in to comment.