diff --git a/src/file/tree/filter.rs b/src/file/tree/filter.rs index f7e361c..965d09c 100644 --- a/src/file/tree/filter.rs +++ b/src/file/tree/filter.rs @@ -1,4 +1,5 @@ use crate::{ + error::prelude::*, file::{tree::Tree, File}, user::{ args::{FileType, Layout}, @@ -6,24 +7,79 @@ use crate::{ }, }; use ahash::HashSet; -use indextree::NodeId; +use indextree::{NodeEdge, NodeId}; +use regex::Regex; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("A regex pattern was not provided")] + MissingRegexPattern, + + #[error("{0}")] + InvalidRegex(regex::Error), +} /// Predicate used for filtering a [`File`] based on [`FileType`]. pub type FileTypeFilter = dyn Fn(&File) -> bool; impl Tree { + /// Filter [`File`]s in the [`indextree::Arena`] based on provided [`Context`]. The order in + /// which the filters are applied matters. + pub fn filter_nodes(&mut self, ctx: &Context) -> Result<()> { + if !ctx.file_type.is_empty() { + self.filter_file_type(ctx); + } + + if ctx.pattern.is_some() { + self.filter_regex(ctx)?; + } + + if ctx.prune { + self.prune(); + } + + Ok(()) + } + + /// Remove all directories that have no children. + fn prune(&mut self) { + let mut pruning = true; + + while pruning { + let mut to_remove = vec![]; + + for node_edge in self.root_id.traverse(&self.arena) { + match node_edge { + NodeEdge::Start(_) => continue, + NodeEdge::End(n) => { + if self.arena[n].get().is_dir() && n.children(&self.arena).count() == 0 { + to_remove.push(n); + } + }, + } + } + + if !to_remove.is_empty() { + to_remove + .into_iter() + .for_each(|n| n.remove_subtree(&mut self.arena)); + continue; + } + + pruning = false; + } + } + /// Updates the [`Tree`]'s inner [`indextree::Arena`] to only contain files of certain - /// file-types. + /// file-types. This should not affect disk-usage calculations. + /// + /// TODO: Consider using Rayon for parallel filtering. pub fn filter_file_type( &mut self, Context { layout, file_type, .. }: &Context, ) { - if file_type.is_empty() { - return; - } - let mut filters = Vec::>::new(); for ft in HashSet::from_iter(file_type) { @@ -56,6 +112,46 @@ impl Tree { to_remove .into_iter() - .for_each(|n| n.detach(&mut self.arena)); + .for_each(|n| n.remove_subtree(&mut self.arena)); + } + + pub fn filter_regex( + &mut self, + Context { + pattern, layout, .. + }: &Context, + ) -> Result<()> { + let re_pattern = pattern + .as_ref() + .ok_or(Error::MissingRegexPattern) + .into_report(ErrorCategory::User)?; + + let regex = Regex::new(re_pattern) + .map_err(Error::InvalidRegex) + .into_report(ErrorCategory::User)?; + + let to_remove = match layout { + Layout::Flat => self + .root_id + .descendants(&self.arena) + .filter(|node_id| { + !regex.is_match(self.arena[*node_id].get().path().to_string_lossy().as_ref()) + }) + .collect::>(), + _ => self + .root_id + .descendants(&self.arena) + .filter(|node_id| { + let node = self.arena[*node_id].get(); + !node.is_dir() && !regex.is_match(node.path().to_string_lossy().as_ref()) + }) + .collect::>(), + }; + + to_remove + .into_iter() + .for_each(|n| n.remove_subtree(&mut self.arena)); + + Ok(()) } } diff --git a/src/file/tree/mod.rs b/src/file/tree/mod.rs index bd25e21..afaa8c2 100644 --- a/src/file/tree/mod.rs +++ b/src/file/tree/mod.rs @@ -11,7 +11,7 @@ use ahash::{HashMap, HashSet}; use indextree::{Arena, NodeId}; use std::{ops::Deref, path::PathBuf}; -/// Concerned with filtering via file-type, globbing, and regular expressions. +/// Concerned with pruning and filtering via file-type, globbing, and regular expressions. mod filter; /// Parallel disk reading. @@ -300,19 +300,6 @@ impl Tree { }) } - /// Remove directories that have no children. - pub fn prune(&mut self) { - let to_prune = self - .root_id - .descendants(&self.arena) - .filter(|n| self.arena[*n].get().is_dir() && n.children(&self.arena).count() == 0) - .collect::>(); - - to_prune - .into_iter() - .for_each(|n| n.remove_subtree(&mut self.arena)); - } - pub fn root_id(&self) -> NodeId { self.root_id } diff --git a/src/main.rs b/src/main.rs index 402443c..a7488bb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -52,13 +52,7 @@ fn run() -> Result<()> { })? }; - if ctx.prune { - file_tree.prune(); - } - - if !ctx.file_type.is_empty() { - file_tree.filter_file_type(&ctx) - } + file_tree.filter_nodes(&ctx)?; let output = render::output(&file_tree, &ctx)?; diff --git a/src/user/mod.rs b/src/user/mod.rs index 1a3dd30..71b258a 100644 --- a/src/user/mod.rs +++ b/src/user/mod.rs @@ -97,7 +97,8 @@ pub struct Context { #[arg(short, long, value_enum, default_value_t)] pub metric: args::Metric, - /// Regular expression (or glob if '--glob' or '--iglob' is used) used to match files + /// Regular expression (or glob if '--glob' or '--iglob' is used) used to match files by their + /// relative path #[arg(short, long)] pub pattern: Option,