Skip to content

Commit

Permalink
fix pruning logic
Browse files Browse the repository at this point in the history
  • Loading branch information
solidiquis committed Dec 1, 2023
1 parent 6a9e95b commit 40ecb03
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 29 deletions.
110 changes: 103 additions & 7 deletions src/file/tree/filter.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,85 @@
use crate::{
error::prelude::*,
file::{tree::Tree, File},
user::{
args::{FileType, Layout},
Context,
},
};
use ahash::HashSet;
use indextree::NodeId;
use indextree::{NodeEdge, NodeId};
use regex::Regex;

#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("A regex pattern was not provided")]
MissingRegexPattern,

#[error("{0}")]
InvalidRegex(regex::Error),
}

/// Predicate used for filtering a [`File`] based on [`FileType`].
pub type FileTypeFilter = dyn Fn(&File) -> bool;

impl Tree {
/// Filter [`File`]s in the [`indextree::Arena`] based on provided [`Context`]. The order in
/// which the filters are applied matters.
pub fn filter_nodes(&mut self, ctx: &Context) -> Result<()> {
if !ctx.file_type.is_empty() {
self.filter_file_type(ctx);
}

if ctx.pattern.is_some() {
self.filter_regex(ctx)?;
}

if ctx.prune {
self.prune();
}

Ok(())
}

/// Remove all directories that have no children.
fn prune(&mut self) {
let mut pruning = true;

while pruning {
let mut to_remove = vec![];

for node_edge in self.root_id.traverse(&self.arena) {
match node_edge {
NodeEdge::Start(_) => continue,
NodeEdge::End(n) => {
if self.arena[n].get().is_dir() && n.children(&self.arena).count() == 0 {
to_remove.push(n);
}
},
}
}

if !to_remove.is_empty() {
to_remove
.into_iter()
.for_each(|n| n.remove_subtree(&mut self.arena));
continue;
}

pruning = false;
}
}

/// Updates the [`Tree`]'s inner [`indextree::Arena`] to only contain files of certain
/// file-types.
/// file-types. This should not affect disk-usage calculations.
///
/// TODO: Consider using Rayon for parallel filtering.
pub fn filter_file_type(
&mut self,
Context {
layout, file_type, ..
}: &Context,
) {
if file_type.is_empty() {
return;
}

let mut filters = Vec::<Box<FileTypeFilter>>::new();

for ft in HashSet::from_iter(file_type) {
Expand Down Expand Up @@ -56,6 +112,46 @@ impl Tree {

to_remove
.into_iter()
.for_each(|n| n.detach(&mut self.arena));
.for_each(|n| n.remove_subtree(&mut self.arena));
}

pub fn filter_regex(
&mut self,
Context {
pattern, layout, ..
}: &Context,
) -> Result<()> {
let re_pattern = pattern
.as_ref()
.ok_or(Error::MissingRegexPattern)
.into_report(ErrorCategory::User)?;

let regex = Regex::new(re_pattern)
.map_err(Error::InvalidRegex)
.into_report(ErrorCategory::User)?;

let to_remove = match layout {
Layout::Flat => self
.root_id
.descendants(&self.arena)
.filter(|node_id| {
!regex.is_match(self.arena[*node_id].get().path().to_string_lossy().as_ref())
})
.collect::<Vec<_>>(),
_ => self
.root_id
.descendants(&self.arena)
.filter(|node_id| {
let node = self.arena[*node_id].get();
!node.is_dir() && !regex.is_match(node.path().to_string_lossy().as_ref())
})
.collect::<Vec<_>>(),
};

to_remove
.into_iter()
.for_each(|n| n.remove_subtree(&mut self.arena));

Ok(())
}
}
15 changes: 1 addition & 14 deletions src/file/tree/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use ahash::{HashMap, HashSet};
use indextree::{Arena, NodeId};
use std::{ops::Deref, path::PathBuf};

/// Concerned with filtering via file-type, globbing, and regular expressions.
/// Concerned with pruning and filtering via file-type, globbing, and regular expressions.
mod filter;

/// Parallel disk reading.
Expand Down Expand Up @@ -300,19 +300,6 @@ impl Tree {
})
}

/// Remove directories that have no children.
pub fn prune(&mut self) {
let to_prune = self
.root_id
.descendants(&self.arena)
.filter(|n| self.arena[*n].get().is_dir() && n.children(&self.arena).count() == 0)
.collect::<Vec<_>>();

to_prune
.into_iter()
.for_each(|n| n.remove_subtree(&mut self.arena));
}

pub fn root_id(&self) -> NodeId {
self.root_id
}
Expand Down
8 changes: 1 addition & 7 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,7 @@ fn run() -> Result<()> {
})?
};

if ctx.prune {
file_tree.prune();
}

if !ctx.file_type.is_empty() {
file_tree.filter_file_type(&ctx)
}
file_tree.filter_nodes(&ctx)?;

let output = render::output(&file_tree, &ctx)?;

Expand Down
3 changes: 2 additions & 1 deletion src/user/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ pub struct Context {
#[arg(short, long, value_enum, default_value_t)]
pub metric: args::Metric,

/// Regular expression (or glob if '--glob' or '--iglob' is used) used to match files
/// Regular expression (or glob if '--glob' or '--iglob' is used) used to match files by their
/// relative path
#[arg(short, long)]
pub pattern: Option<String>,

Expand Down

0 comments on commit 40ecb03

Please sign in to comment.