Skip to content

Commit

Permalink
Refactor invariants and remove breadth implementations.
Browse files Browse the repository at this point in the history
This change removes the implementation of composition for breadth,
because it is no longer needed, not particularly useful, and difficult
to implement. Invariants have been reorganized and refactored.

This change also removes some lingering obsolete comments and fixes up
some comments.
  • Loading branch information
olson-sean-k committed Feb 12, 2024
1 parent a95f634 commit 01c893d
Show file tree
Hide file tree
Showing 8 changed files with 319 additions and 528 deletions.
125 changes: 24 additions & 101 deletions src/token/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ impl<'t, A> Token<'t, A> {
{
if let Some(concatenation) = self.as_concatenation_mut() {
if n >= concatenation.tokens().len() {
// Pop the entire concatenation if exhausted. Note that this always pops empty
// Pop the entire concatenation if exhausted. This always pops empty
// concatenations.
(vec![self], None)
}
Expand Down Expand Up @@ -401,9 +401,14 @@ impl<'t, A> Token<'t, A> {
self::components(self.concatenation())
}

// TODO: Do not directly expose `Component` in these APIs. Instead, output a type, in its
// public API, pairs a `LiteralSequence` with the tokens of that sequence. This is
// necessary for reading the annotations of the `LiteralSequence`.
// TODO: This API operates differently than `components`, but appears similar and may be
// confusing. Unlike `components`, this function searches the tree of the token rather
// than only its concatenation. Consider names or different APIs that make this more
// obvious.
// TODO: This function only finds `LiteralSequence` components rather than **invariant text**
// components, and so may miss some interesting text. Consider `{.}{.}/a`. The text of
// this pattern is invariant, but this function does not find and cannot represent the
// `..` component.
pub fn literals(
&self,
) -> impl Iterator<Item = (Component<'_, 't, A>, LiteralSequence<'_, 't>)> {
Expand Down Expand Up @@ -459,7 +464,7 @@ impl<'t, A> Token<'t, A> {
T: Conjunction<Output = T> + Invariant,
{
self.fold(TreeVariance::default())
.unwrap_or_else(Variance::identity)
.unwrap_or_else(Variance::zero)
.map_invariant(|invariant| ops::conjunction(T::once(), invariant))
}

Expand Down Expand Up @@ -731,7 +736,7 @@ impl<'t, A> From<LeafKind<'t>> for TokenTopology<'t, A> {
// TODO: The use of this trait and `BranchFold` in `Token::fold_map` is a very unfortunate
// consequence of using an intrusive tree: the data and topology of a token tree cannot be
// cleanly separated. Remove these APIs if and when the tree is implemented via an
// unintrusive data structure.
// unintrusive data structure. See `BranchFold`.
trait BranchComposition<'t>: Sized {
type Annotation;
type BranchData;
Expand Down Expand Up @@ -974,12 +979,6 @@ impl<'t, A> From<Vec<Token<'t, A>>> for Alternation<'t, A> {
}
}

impl<'t, A> VarianceFold<Breadth> for Alternation<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Breadth>>) -> Option<VarianceOf<Breadth>> {
terms.into_iter().reduce(ops::disjunction)
}
}

impl<'t, A> VarianceFold<Depth> for Alternation<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Depth>>) -> Option<VarianceOf<Depth>> {
terms.into_iter().reduce(ops::disjunction)
Expand Down Expand Up @@ -1086,19 +1085,19 @@ impl Class {
.iter()
.map(Archetype::term)
.reduce(f)
.unwrap_or_else(Variance::identity)
.unwrap_or_else(Variance::zero)
}
}

impl VarianceTerm<Breadth> for Class {
fn term(&self) -> VarianceOf<Breadth> {
Variance::identity()
Variance::zero()
}
}

impl VarianceTerm<Depth> for Class {
fn term(&self) -> VarianceOf<Depth> {
Variance::identity()
Variance::zero()
}
}

Expand Down Expand Up @@ -1151,12 +1150,6 @@ impl<'t, A> From<Vec<Token<'t, A>>> for Concatenation<'t, A> {
}
}

impl<'t, A> VarianceFold<Breadth> for Concatenation<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Breadth>>) -> Option<VarianceOf<Breadth>> {
terms.into_iter().reduce(ops::disjunction)
}
}

impl<'t, A> VarianceFold<Depth> for Concatenation<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Depth>>) -> Option<VarianceOf<Depth>> {
terms.into_iter().reduce(ops::conjunction)
Expand Down Expand Up @@ -1232,13 +1225,13 @@ impl<'t> Literal<'t> {

impl<'t> VarianceTerm<Breadth> for Literal<'t> {
fn term(&self) -> VarianceOf<Breadth> {
Variance::identity()
Variance::zero()
}
}

impl<'t> VarianceTerm<Depth> for Literal<'t> {
fn term(&self) -> VarianceOf<Depth> {
Variance::identity()
Variance::zero()
}
}

Expand All @@ -1257,13 +1250,10 @@ impl<'t> VarianceTerm<Text<'t>> for Literal<'t> {
}
}

// TODO: The `VarianceFold` implementations for `Repetition` must reimplement concatenation. Note
// that `Repetition` alone has an interesting `finalize` implementation, which is a
// consequence of the way that it adapts another token. Can `VarianceFold` be decomposed in a
// way that avoids this redundancy? That is, when a branch token has exactly one child token,
// it can defer entirely to that child token for folding variance terms and likely must only
// implement `finalize`, but this is not well represented by `VarianceFold`, which is a bit
// general and unstructured.
// TODO: The `VarianceFold` implementations for `Repetition` reimplement concatenation in `fold`.
// Moreover, `Repetition`s have only one token, so `fold` can simply forward its term. Can
// `VarianceFold` be decomposed in a way that avoids this redundancy? Note too that no other
// token (at time of writing) has an interesting `finalize` implementation.
#[derive(Clone, Debug)]
pub struct Repetition<'t, A> {
token: Box<Token<'t, A>>,
Expand All @@ -1288,66 +1278,6 @@ impl<'t, A> Repetition<'t, A> {
pub fn variance(&self) -> NaturalRange {
self.bound_specification().into()
}

//fn repeat_or_conjunction<T>(&self, term: VarianceOf<T>) -> VarianceOf<T>
//where
// VarianceOf<T>: Conjunction<Output = VarianceOf<T>>,
// T: Invariant + Product,
// T::Bound: From<BoundedNonEmptyRange> + Product,
//{
// self.repeat_or_else(term, ops::conjunction)
//}

//fn repeat_or_else<T, F>(&self, term: VarianceOf<T>, f: F) -> VarianceOf<T>
//where
// T: Invariant + Product<usize, Output = T>,
// T::Bound: From<BoundedNonEmptyRange> + Product,
// F: FnOnce(VarianceOf<T>, VarianceOf<T>) -> VarianceOf<T>,
//{
// // TODO: Hmm, what about `<<?:2,>>`? The size of the inner repetition is
// // `Variant(Bounded(2,))`. The outer repetition should probably be
// // `Variant(Unbounded)` here, but some interesting information has been lost in that
// // bound: the match is a multiple of two. In fact, it may be incorrect to consider
// // this unbounded. The extents of the size reach the extremes, but there is a
// // definite bound on the set of sizes. This may be important to detect when querying
// // exhaustiveness.
// // TODO: NOPE, THIS IS NOT QUITE RIGHT! The problem here is the operation. It looks like we
// // need multiplication rather than addition! That seems intuitive, right? If the
// // lower bound of a repetition is zero, then multiplying the like term in a bounded
// // term yields... zero! No more lower bound! Similarly, `<<?:2,>:2,>` should yield
// // `Variant(Bounded(4,))`, just like `<<?:2>:2>` yields `Invariant(4)`.
// //
// // The only operator that should be used here is probably `repeated`, but it may not
// // be general enough yet. There should be no `_or_else` version of this function.
// // Perhaps this only applies when the repetition is bounded though. If that's true,
// // just use conjunction here without parameterization.
// //
// // Honestly, this is beginning to look like `repeated` is as fundamental as
// // `conjunction` and `Variance` must support it. That could be a bit of a bummer
// // though, as I'd expect it to essentially be conjunction where only the **inner*
// // function becomes `repeated`. We should be able to factor that out though with an
// // intermediate function that accepts an inner function and is then used to implement
// // the traits.
// match self.variance() {
// // Repeating an invariant can cause overflows, very large allocations, and very
// // inefficient comparisons (e.g., comparing very large strings). This is detected by
// // both `encode::compile` and `rule::check` (in distinct but similar ways). Querying
// // token trees for an invariant must be done with care (after using these functions) to
// // avoid expanding pathological invariant expressions like `<long:9999999999999>`.
// Variance::Invariant(n) => term
// .map_invariant(|invariant| ops::product(invariant, n))
// .map_variant(|bound| bound.map_bounded(|bound| ops::product(bound, n))),
// Variance::Variant(bound) => match bound {
// Bound::Bounded(bound) => f(term, Variance::Variant(Bound::Bounded(bound.into()))),
// // If the repetition is unbounded, then ignore the term. Note that the conjunction
// // of an unbounded term and a bounded term is the bounded term but with an open
// // upper bound. However, this (and other operators) does not distribute the open
// // lower bound of such a repetition correctly. Consider `<<?:1,>>`, for example.
// _ => Variance::unbounded(),
// },
// //Variance::Variant(bound) => f(term, Variance::Variant(bound.map_bounded(From::from))),
// }
//}
}

impl<'t, A> BranchComposition<'t> for Repetition<'t, A> {
Expand All @@ -1371,12 +1301,6 @@ impl<'t, A> BranchComposition<'t> for Repetition<'t, A> {
}
}

impl<'t, A> VarianceFold<Breadth> for Repetition<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Breadth>>) -> Option<VarianceOf<Breadth>> {
terms.into_iter().reduce(ops::disjunction)
}
}

impl<'t, A> VarianceFold<Depth> for Repetition<'t, A> {
fn fold(&self, terms: Vec<VarianceOf<Depth>>) -> Option<VarianceOf<Depth>> {
terms.into_iter().reduce(ops::conjunction)
Expand Down Expand Up @@ -1416,7 +1340,7 @@ impl Separator {

impl VarianceTerm<Breadth> for Separator {
fn term(&self) -> VarianceOf<Breadth> {
Variance::identity()
Variance::zero()
}
}

Expand Down Expand Up @@ -1481,7 +1405,7 @@ where
impl VarianceTerm<Breadth> for Wildcard {
fn term(&self) -> VarianceOf<Breadth> {
match self {
Wildcard::One => Variance::identity(),
Wildcard::One => Variance::zero(),
_ => Variance::unbounded(),
}
}
Expand All @@ -1491,7 +1415,7 @@ impl VarianceTerm<Depth> for Wildcard {
fn term(&self) -> VarianceOf<Depth> {
match self {
Wildcard::Tree { .. } => Variance::unbounded(),
_ => Variance::identity(),
_ => Variance::zero(),
}
}
}
Expand Down Expand Up @@ -1626,8 +1550,7 @@ fn components<'i, 't, A>(tokens: &'i [Token<'t, A>]) -> impl Iterator<Item = Com
})
}

// TODO: Test against token trees constructed directly from tokens (rather than parsed from
// expressions).
// TODO: Test also against token trees constructed directly from tokens rather than the parser.
#[cfg(test)]
mod tests {
use crate::token::{self, Token, TokenTree};
Expand Down
Loading

0 comments on commit 01c893d

Please sign in to comment.