Skip to content

Commit

Permalink
Merge #156: Field splitting (and empty field removal)
Browse files Browse the repository at this point in the history
  • Loading branch information
magicant committed May 12, 2022
2 parents 83d54d9 + 0dacd7a commit 56441b1
Show file tree
Hide file tree
Showing 6 changed files with 990 additions and 7 deletions.
2 changes: 1 addition & 1 deletion yash-semantics/src/assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ mod tests {
assert_eq!(
env.variables.get("b").unwrap(),
&Variable {
value: Value::Array(vec!["".to_string()]),
value: Value::Array(vec![]),
last_assigned_location: Some(assigns[1].location.clone()),
is_exported: false,
read_only_location: None,
Expand Down
96 changes: 90 additions & 6 deletions yash-semantics/src/expansion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@
//!
//! ## Field splitting
//!
//! The field splitting divides a field into smaller parts delimited by a
//! character contained in `$IFS`. Consequently, this operation removes empty
//! The [field splitting](split) divides a field into smaller parts delimited by
//! a character contained in `$IFS`. Consequently, this operation removes empty
//! fields from the results of the previous steps.
//!
//! ## Pathname expansion
Expand All @@ -68,15 +68,18 @@ pub mod attr_strip;
pub mod initial;
pub mod phrase;
pub mod quote_removal;
pub mod split;

use self::attr::AttrChar;
use self::attr::AttrField;
use self::attr::Origin;
use self::initial::Expand;
use self::split::Ifs;
use std::borrow::Cow;
use yash_env::semantics::ExitStatus;
use yash_env::system::Errno;
use yash_env::variable::ReadOnlyError;
use yash_env::variable::Variable;
use yash_syntax::source::pretty::Annotation;
use yash_syntax::source::pretty::AnnotationType;
use yash_syntax::source::pretty::MessageBase;
Expand Down Expand Up @@ -226,9 +229,11 @@ pub async fn expand_words<'a, I: IntoIterator<Item = &'a Word>>(
env: &mut yash_env::Env,
words: I,
) -> Result<(Vec<Field>, Option<ExitStatus>)> {
let mut env = initial::Env::new(env);

// initial expansion //
let words = words.into_iter();
let mut fields = Vec::with_capacity(words.size_hint().0);
let mut env = initial::Env::new(env);
for word in words {
use self::initial::QuickExpand::*;
let phrase = match word.quick_expand(&mut env) {
Expand All @@ -242,10 +247,25 @@ pub async fn expand_words<'a, I: IntoIterator<Item = &'a Word>>(
}

// TODO brace expansion
// TODO field splitting
// TODO pathname expansion (or quote removal and attribute stripping)

let fields = fields
// field splitting //
use yash_env::variable::Value::Scalar;
#[rustfmt::skip]
let ifs = match env.inner.variables.get("IFS") {
Some(&Variable { value: Scalar(ref value), .. }) => Ifs::new(value),
// TODO If the variable is an array, should we ignore it?
_ => Ifs::default(),
};
let mut split_fields = Vec::with_capacity(fields.len());
for field in fields {
split::split_into(field, &ifs, &mut split_fields);
}
drop(ifs);

// TODO pathname expansion

// quote removal and attribute stripping //
let fields = split_fields
.into_iter()
.map(AttrField::remove_quotes_and_strip)
.collect();
Expand Down Expand Up @@ -283,6 +303,7 @@ mod tests {
use futures_util::FutureExt;
use std::num::NonZeroU64;
use std::rc::Rc;
use yash_env::variable::Scope;
use yash_env::variable::Value;
use yash_env::variable::Variable;
use yash_syntax::source::pretty::Message;
Expand Down Expand Up @@ -326,6 +347,69 @@ mod tests {
assert_eq!(message.annotations[1].location, &Location::dummy("ROL"));
}

#[test]
fn expand_words_performs_field_splitting_possibly_with_default_ifs() {
let mut env = yash_env::Env::new_virtual();
env.variables
.assign(
Scope::Global,
"v".to_string(),
Variable {
value: Value::Scalar("foo bar ".to_string()),
last_assigned_location: None,
is_exported: false,
read_only_location: None,
},
)
.unwrap();
let words = &["$v".parse().unwrap()];
let result = expand_words(&mut env, words).now_or_never().unwrap();
let (fields, exit_status) = result.unwrap();
assert_eq!(exit_status, None);
assert_matches!(fields.as_slice(), [f1, f2] => {
assert_eq!(f1.value, "foo");
assert_eq!(f2.value, "bar");
});
}

#[test]
fn expand_words_performs_field_splitting_with_current_ifs() {
let mut env = yash_env::Env::new_virtual();
env.variables
.assign(
Scope::Global,
"v".to_string(),
Variable {
value: Value::Scalar("foo bar ".to_string()),
last_assigned_location: None,
is_exported: false,
read_only_location: None,
},
)
.unwrap();
env.variables
.assign(
Scope::Global,
"IFS".to_string(),
Variable {
value: Value::Scalar(" o".to_string()),
last_assigned_location: None,
is_exported: false,
read_only_location: None,
},
)
.unwrap();
let words = &["$v".parse().unwrap()];
let result = expand_words(&mut env, words).now_or_never().unwrap();
let (fields, exit_status) = result.unwrap();
assert_eq!(exit_status, None);
assert_matches!(fields.as_slice(), [f1, f2, f3] => {
assert_eq!(f1.value, "f");
assert_eq!(f2.value, "");
assert_eq!(f3.value, "bar");
});
}

#[test]
fn expand_value_scalar() {
let mut env = yash_env::Env::new_virtual();
Expand Down
221 changes: 221 additions & 0 deletions yash-semantics/src/expansion/split.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
// This file is part of yash, an extended POSIX shell.
// Copyright (C) 2022 WATANABE Yuki
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

//! Field splitting
//!
//! The field splitting divides a field into smaller parts delimited by a field
//! separator character. Fields are delimited by a field separator character,
//! usually obtained from the `$IFS` variable. Every occurrence of a
//! non-whitespace separator delimits a new field (which may be an empty field).
//! One or more adjacent whitespace separators in the middle of a field further
//! split the field. Any separator does not remain in the final results.
//!
//! Only [unquoted characters](AttrChar) having a `SoftExpansion`
//! [origin](Origin) are considered for delimiting. Other characters are not
//! subject to field splitting.
//!
//! # Example
//!
//! ```
//! use yash_syntax::source::Location;
//! use yash_semantics::expansion::attr::{AttrChar, AttrField, Origin};
//! use yash_semantics::expansion::split::{Ifs, split};
//!
//! // We use this utility to prepare fields used in the examples below:
//! fn field(s: &str) -> AttrField {
//! let chars = s.chars()
//! .map(|c| AttrChar {
//! value: c,
//! origin: Origin::SoftExpansion,
//! is_quoted: false,
//! is_quoting: false,
//! })
//! .collect();
//! let origin = Location::dummy("");
//! AttrField { chars, origin }
//! }
//!
//! let ifs = Ifs::new(" -");
//!
//! // When there are no separators in the input, the result is the input itself:
//! let fields: Vec<AttrField> = split(field("abc"), &ifs);
//! assert_eq!(fields, [field("abc")]);
//!
//! // Whitespace separators are removed:
//! let fields: Vec<AttrField> = split(field(" abc "), &ifs);
//! assert_eq!(fields, [field("abc")]);
//!
//! // An empty input yields no fields rather than an empty field:
//! let fields: Vec<AttrField> = split(field(""), &ifs);
//! assert_eq!(fields, []);
//!
//! // Whitespace separators split fields:
//! let fields: Vec<AttrField> = split(field("foo bar baz"), &ifs);
//! assert_eq!(fields, [field("foo"), field("bar"), field("baz")]);
//!
//! // Non-whitespace separators each split fields, which may produce empty fields:
//! let fields: Vec<AttrField> = split(field("foo-bar--baz"), &ifs);
//! assert_eq!(fields, [field("foo"), field("bar"), field(""), field("baz")]);
//!
//! // Whitespace separators around non-whitespace separators are ignored:
//! let fields: Vec<AttrField> = split(field("foo - bar - - baz"), &ifs);
//! assert_eq!(fields, [field("foo"), field("bar"), field(""), field("baz")]);
//!
//! // Trailing non-whitespace separators may seem special:
//! let fields: Vec<AttrField> = split(field("foo-bar"), &ifs);
//! assert_eq!(fields, [field("foo"), field("bar")]);
//! let fields: Vec<AttrField> = split(field("foo-bar-"), &ifs);
//! assert_eq!(fields, [field("foo"), field("bar")]);
//! let fields: Vec<AttrField> = split(field("foo-bar--"), &ifs);
//! assert_eq!(fields, [field("foo"), field("bar"), field("")]);
//! ```
//!
//! # The empty-last-field option
//!
//! TODO: Not yet supported

mod ifs;
mod ranges;

pub use self::ifs::{Class, Ifs};
pub use self::ranges::Ranges;

use super::attr::AttrField;
#[cfg(doc)]
use super::attr::{AttrChar, Origin};

/// Performs field splitting and appends the result to a collection.
///
/// This function applies field splitting to the given field using the given IFS
/// and extends the given collection with the results. The resultant fields
/// share the same origin as the input field.
///
/// See also [`split`], which returns the results in a new collection rather
/// than extending an existing one.
pub fn split_into<R>(field: AttrField, ifs: &Ifs, results: &mut R)
where
R: Extend<AttrField>,
{
/*
results.extend(
ifs.ranges(field.chars.iter().copied())
.map(|range| AttrField {
chars: field.chars[range].to_vec(),
origin: field.origin.clone(),
}),
);
*/

// Optimize by reusing the original field for the last one.
let mut ranges = ifs.ranges(field.chars.iter().copied()).peekable();
while let Some(range) = ranges.next() {
// TODO Use Extend::extend_one when stabilized (rust#72631)
if ranges.peek().is_some() {
results.extend(std::iter::once(AttrField {
chars: field.chars[range].to_vec(),
origin: field.origin.clone(),
}));
} else {
let mut field = field;
field.chars.truncate(range.end);
field.chars.drain(..range.start);
results.extend(std::iter::once(field));
break;
}
}
}

/// Performs field splitting and returns the result in a new collection.
///
/// This function works similarly to [`split_into`], but returns the results in
/// a new collection.
pub fn split<R>(field: AttrField, ifs: &Ifs) -> R
where
R: Default + Extend<AttrField>,
{
let mut results = R::default();
split_into(field, ifs, &mut results);
results
}

#[cfg(test)]
mod tests {
use super::super::attr::{AttrChar, Origin};
use super::*;
use yash_syntax::source::Location;

fn dummy_attr_field(s: &str) -> AttrField {
let chars = s
.chars()
.map(|c| AttrChar {
value: c,
origin: Origin::SoftExpansion,
is_quoted: false,
is_quoting: false,
})
.collect();
let origin = Location::dummy("");
AttrField { chars, origin }
}

#[test]
fn split_empty_field() {
let field = dummy_attr_field("");
let ifs = Ifs::default();
let fields: Vec<AttrField> = split(field, &ifs);
assert_eq!(fields, []);
}

#[test]
fn split_no_change() {
let field = dummy_attr_field("abc");
let ifs = Ifs::default();
let fields: Vec<AttrField> = split(field, &ifs);
assert_eq!(fields, [dummy_attr_field("abc")]);
}

#[test]
fn split_into_one_field() {
let field = dummy_attr_field(" foo ");
let ifs = Ifs::default();
let fields: Vec<AttrField> = split(field, &ifs);
assert_eq!(fields, [dummy_attr_field("foo")]);
}

#[test]
fn split_into_two_fields() {
let field = dummy_attr_field("foo bar");
let ifs = Ifs::default();
let fields: Vec<AttrField> = split(field, &ifs);
assert_eq!(fields, [dummy_attr_field("foo"), dummy_attr_field("bar")]);
}

#[test]
fn split_into_many_fields() {
let field = dummy_attr_field(" one two three four ");
let ifs = Ifs::default();
let fields: Vec<AttrField> = split(field, &ifs);
assert_eq!(
fields,
[
dummy_attr_field("one"),
dummy_attr_field("two"),
dummy_attr_field("three"),
dummy_attr_field("four")
]
);
}
}
Loading

0 comments on commit 56441b1

Please sign in to comment.