diff --git a/benches/process.rs b/benches/process.rs index cb07420..6b66c87 100644 --- a/benches/process.rs +++ b/benches/process.rs @@ -1,26 +1,24 @@ use criterion::{Criterion, criterion_group, criterion_main}; use std::hint::black_box; -use string_pipeline::process; +use string_pipeline::Template; fn criterion_benchmark(c: &mut Criterion) { c.bench_function("process_simple", |b| { b.iter(|| { - process( - black_box("/home/user/.cargo/bin"), - // output: "bin" - black_box("{split:/:-1}"), - ) - .unwrap() + Template::parse(black_box("{split:/:-1}")) + .unwrap() + .format(black_box("/home/user/.cargo/bin")) + .unwrap() }) }); c.bench_function("process_complex", |b| { b.iter(|| { - process( - black_box(" 18, 4.92, Unknown"), - // output: "NUM: 18 - NUM: 4.92" - black_box("{split:,:0..2|trim|prepend:num\\: |join: - |upper}"), - ) + Template::parse(black_box( + "{split:,:0..2|trim|prepend:num\\: |join: - |upper}", + )) + .unwrap() + .format(black_box("18, 4.92, Unknown")) .unwrap() }) }); diff --git a/src/lib.rs b/src/lib.rs index 1654a43..e34a156 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,70 @@ //! # string_pipeline //! //! A flexible, template-driven string transformation pipeline for Rust. +//! +//! This library provides a way to define a sequence of string operations using a concise template syntax, +//! allowing for dynamic string manipulation based on user-defined templates. +//! +//! # Quick start +//! ```rust +//! use string_pipeline::Template; +//! +//! // Define a template with operations +//! let template = Template::parse("{split:,:0..2|join: and }").unwrap(); +//! +//! // Format a string using the template +//! let result = template.format("a,b,c,d").unwrap(); +//! +//! assert_eq!(result, "a and b"); +//! ``` +//! +//! A more in-depth view of the template syntax can be found in the [Template::parse](Template::parse) method documentation. +//! +//! # More examples +//! Get the second item in a comma-separated list: +//! ```rust +//! use string_pipeline::Template; +//! +//! let template = Template::parse("{split:,:1}").unwrap(); +//! +//! let result = template.format("a,b,c").unwrap(); +//! +//! assert_eq!(result, "b"); +//! ``` +//! +//! Replace all spaces with underscores and uppercase: +//! ```rust +//! use string_pipeline::Template; +//! +//! let template = Template::parse("{replace:s/ /_/g|upper}").unwrap(); +//! +//! let result = template.format("foo bar baz").unwrap(); +//! +//! assert_eq!(result, "FOO_BAR_BAZ"); +//! ``` +//! +//! Trim, split and append a suffix to each resulting item: +//! ```rust +//! use string_pipeline::Template; +//! +//! let template = Template::parse("{split:,:..|trim|append:!}").unwrap(); +//! +//! let result = template.format(" a, b,c , d , e ").unwrap(); +//! +//! assert_eq!(result, "a!,b!,c!,d!,e!"); +//! ``` +//! +//! Strip ANSI escape codes: +//! ```rust +//! use string_pipeline::Template; +//! +//! let template = Template::parse("{strip_ansi}").unwrap(); +//! +//! let result = template.format("\x1b[31mHello\x1b[0m").unwrap(); +//! +//! assert_eq!(result, "Hello"); +//! ``` mod pipeline; -pub use pipeline::apply_ops; -pub use pipeline::parse_template; -pub use pipeline::process; -pub use pipeline::{RangeSpec, StringOp, Value}; +pub use pipeline::Template; diff --git a/src/main.rs b/src/main.rs index d6df5c8..dc7f349 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use clap::Parser; use std::io::{self, Read}; -use string_pipeline::process; +use string_pipeline::Template; #[derive(Parser)] struct Cli { @@ -33,10 +33,15 @@ fn main() { }, }; - match process(&input, &cli.template) { + let template = Template::parse(&cli.template).unwrap_or_else(|e| { + eprintln!("Error parsing template: {}", e); + std::process::exit(1); + }); + + match template.format(&input) { Ok(result) => println!("{}", result), Err(e) => { - eprintln!("Error: {}", e); + eprintln!("Error formatting input: {}", e); std::process::exit(1); } } diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs index 4da5168..2f0f26a 100644 --- a/src/pipeline/mod.rs +++ b/src/pipeline/mod.rs @@ -2,8 +2,11 @@ use regex::Regex; mod parser; use strip_ansi_escapes::strip; +pub use crate::pipeline::template::Template; +mod template; + #[derive(Debug, Clone)] -pub enum Value { +enum Value { Str(String), List(Vec), } @@ -55,10 +58,6 @@ pub enum RangeSpec { Range(Option, Option, bool), // (start, end, inclusive) } -pub fn parse_template(template: &str) -> Result<(Vec, bool), String> { - parser::parse_template(template) -} - fn resolve_index(idx: isize, len: usize) -> usize { if len == 0 { return 0; @@ -380,21 +379,20 @@ pub fn apply_ops(input: &str, ops: &[StringOp], debug: bool) -> Result Result { - let (ops, debug) = parse_template(template)?; - apply_ops(input, &ops, debug) -} - #[cfg(test)] mod tests { - use super::*; + use super::Template; + + fn process(input: &str, template: &str) -> Result { + let tmpl = Template::parse(template)?; + tmpl.format(input) + } // Single Operation Tests - Organized by Operation Type mod single_operations { - use super::*; mod positive_tests { - use super::*; + use super::super::process; // Split operation tests #[test] @@ -979,7 +977,7 @@ mod tests { } mod negative_tests { - use super::*; + use super::super::process; // Split operation negative tests #[test] @@ -1111,11 +1109,8 @@ mod tests { // Two-Step Pipeline Tests mod two_step_pipelines { - use super::*; - mod positive_tests { - use super::*; - + use super::super::process; // Split + Join combinations #[test] fn test_split_join_different_separators() { @@ -1379,7 +1374,7 @@ mod tests { } mod negative_tests { - use super::*; + use super::super::process; // Invalid pipeline combinations #[test] @@ -1430,10 +1425,8 @@ mod tests { // Multi-Step Pipeline Tests mod multi_step_pipelines { - use super::*; - mod positive_tests { - use super::*; + use super::super::process; // Split + Transform + Join patterns #[test] @@ -2135,7 +2128,7 @@ mod tests { } mod negative_tests { - use super::*; + use super::super::process; // Invalid three-step combinations #[test] diff --git a/src/pipeline/template.rs b/src/pipeline/template.rs new file mode 100644 index 0000000..f166177 --- /dev/null +++ b/src/pipeline/template.rs @@ -0,0 +1,179 @@ +use std::fmt::Display; + +use crate::pipeline::{StringOp, apply_ops, parser}; + +/// A `Template` represents a string template with operations that can be applied to format input +/// strings. +/// +/// It allows defining a sequence of operations to transform input strings, such as splitting, +/// joining, replacing, trimming, and more. The template is parsed from a string format that +/// specifies the operations in a concise syntax. +/// +/// The template syntax supports a variety of operations, including: +/// - **Split** +/// - **Join** +/// - **Substring extraction** +/// - **Sed-like replacement using regex** +/// - **Uppercase and lowercase conversion** +/// - **Trimming whitespace or custom characters** +/// - **Appending or prepending text** +/// - etc. +/// +/// A `Template` can be created by parsing a string that follows the defined syntax (see +/// `Template::parse`), and it can then be used to format input strings by applying the specified +/// operations in sequence. +/// +/// # Example +/// Trim, split and append a suffix to each resulting item: +/// ```rust +/// use string_pipeline::Template; +/// +/// let template = Template::parse("{split:,:..|trim|append:!}").unwrap(); +/// +/// let result = template.format(" a, b,c , d , e ").unwrap(); +/// +/// assert_eq!(result, "a!,b!,c!,d!,e!"); +/// ``` +#[derive(Debug)] +pub struct Template { + /// The raw template string. + raw: String, + /// A series of string operations to apply to the target string. + ops: Vec, + /// Whether to enable debug mode, which provides additional output for debugging purposes. + debug: bool, +} + +impl Template { + fn new(raw: String, ops: Vec, debug: bool) -> Self { + Template { raw, ops, debug } + } + + /// Attempts to Parse a template string into a `Template` object. + /// + /// Templates are enclosed in `{}` and consist of a chain of operations separated by `|`. + /// Arguments to operations are separated by `:`. + /// + /// # Syntax Reference + /// + /// - **Template**: `{ [!] operation_list? }` + /// - Add `!` after `{` to enable debug mode. + /// - **Operation List**: `operation ('|' operation)*` + /// - **Operation**: + /// - `split::` + /// - **Shorthand for split**: + /// - `{index}` (e.g. `{1}`, equivalent to `{split: :1}`) + /// - `{range}` (e.g. `{1..3}`, equivalent to `{split: :1..3}`) + /// - `join:` + /// - `substring:` + /// - `replace:s///` + /// - `upper` + /// - `lower` + /// - `trim` + /// - `strip:` + /// - `append:` + /// - `prepend:` + /// - `strip_ansi` + /// - `filter:` + /// - `filter_not:` + /// - `slice:` + /// + /// ## Supported Operations + /// + /// | Operation | Syntax | Description | + /// |-------------------|---------------------------------------------|---------------------------------------------| + /// | Split | `split::` | Split by separator, select by index/range | + /// | Join | `join:` | Join a list with separator | + /// | Substring | `slice:` | Extract substrings | + /// | Replace | `replace:s///` | Regex replace (sed-like) | + /// | Uppercase | `upper` | Convert to uppercase | + /// | Lowercase | `lower` | Convert to lowercase | + /// | Trim | `trim` | Trim whitespace | + /// | Strip | `strip:` | Trim custom characters | + /// | Append | `append:` | Append text | + /// | Prepend | `prepend:` | Prepend text | + /// | StripAnsi | `strip_ansi` | Removes ansi escape sequences | + /// | Filter | `filter:` | Keep only items matching regex pattern | + /// | FilterNot | `filter_not:` | Remove items matching regex pattern | + /// | Slice | `filter_not:` | Select elements from a list | + /// + /// ## Range Specifications + /// + /// Ranges use Rust-like syntax and support negative indices like Python: + /// + /// | Range | Description | Example | + /// |-------|-------------|---------| + /// | `N` | Single index | `{split:,:1}` → second element | + /// | `N..M` | Exclusive range | `{split:,:1..3}` → elements 1,2 | + /// | `N..=M` | Inclusive range | `{split:,:1..=3}` → elements 1,2,3 | + /// | `N..` | From N to end | `{split:,:2..}` → from 2nd to end | + /// | `..N` | From start to N | `{split:,:..3}` → first 3 elements | + /// | `..=N` | From start to N inclusive | `{split:,:..=2}` → first 3 elements | + /// | `..` | All elements | `{split:,:..)` → all elements | + /// + /// Negative indices count from the end: + /// + /// - `-1` = last element + /// - `-2` = second to last element + /// - `-3..` = last 3 elements + /// + /// ## Escaping + /// + /// The parser intelligently handles pipe characters (`|`) based on context: + /// + /// **Pipes are automatically allowed in:** + /// + /// - **Split separators**: `{split:|:..}` (splits on pipe) + /// - **Regex patterns**: `{filter:\.(txt|md|log)}` (alternation) + /// - **Sed replacements**: `{replace:s/test/a|b/}` (pipe in replacement) + /// + /// **Manual escaping needed for:** + /// + /// - **Other arguments**: Use `\|` for literal pipes in join, append, prepend, etc. + /// - **Special characters**: Use `\:` for literal colons, `\\` for backslashes + /// - **Escape sequences**: Use `\n`, `\t`, `\r` for newline, tab, carriage return + /// + /// ## Enable Debug Mode + /// + /// - Add `!` after `{` to enable debug output for each operation: + /// - Example: `{!split:,:..|upper|join:-}` + pub fn parse(template: &str) -> Result { + match parser::parse_template(template) { + Ok((ops, debug)) => Ok(Template::new(template.to_string(), ops, debug)), + Err(e) => Err(e), + } + } + + /// Formats the input string using the operations defined in the template. + /// + /// # Example + /// ```rust + /// use string_pipeline::Template; + /// + /// // Create a template that splits a string by commas, takes the first two items, and joins + /// // them with " and " + /// let template = Template::parse("{split:,:0..2|join: and }").unwrap(); + /// + /// // Format a string using the template + /// let result = template.format("a,b,c,d").unwrap(); + /// + /// assert_eq!(result, "a and b"); + /// ``` + pub fn format(&self, input: &str) -> Result { + apply_ops(input, &self.ops, self.debug) + } +} + +impl TryFrom<&str> for Template { + type Error = String; + + fn try_from(template: &str) -> Result { + Template::parse(template) + } +} + +impl Display for Template { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.raw) + } +}