Skip to content

Commit

Permalink
exclude files from churn (#1120)
Browse files Browse the repository at this point in the history
* exclude file from churn

* remove print
  • Loading branch information
o2sh committed Jul 17, 2023
1 parent 83dbce5 commit 9b8ef7f
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 60 deletions.
16 changes: 8 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ gix = { version = "0.48.0", default-features = false, features = [
"max-performance-safe",
] }
git2 = { version = "0.17.2", default-features = false }
globset = "0.4.11"
human-panic = "1.1.5"
image = "0.24.6"
num-format = "0.4.4"
Expand Down
4 changes: 2 additions & 2 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ pub struct InfoCliOptions {
#[arg(long, value_name = "NUM")]
pub churn_pool_size: Option<usize>,
/// Ignore all files & directories matching EXCLUDE
#[arg(long, short, num_args = 1.., value_hint = ValueHint::AnyPath)]
pub exclude: Vec<PathBuf>,
#[arg(long, short, num_args = 1..)]
pub exclude: Vec<String>,
/// Exclude [bot] commits. Use <REGEX> to override the default pattern
#[arg(long, value_name = "REGEX")]
pub no_bots: Option<Option<MyRegex>>,
Expand Down
2 changes: 1 addition & 1 deletion src/info/author.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::{
use serde::Serialize;
use std::fmt::Write;

#[derive(Serialize, Clone)]
#[derive(Serialize, Clone, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct Author {
pub name: String,
Expand Down
2 changes: 1 addition & 1 deletion src/info/churn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{cli::NumberSeparator, info::format_number};
use serde::Serialize;
use std::fmt::Write;

#[derive(Serialize, Clone)]
#[derive(Serialize, Clone, Debug, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct FileChurn {
pub file_path: String,
Expand Down
70 changes: 48 additions & 22 deletions src/info/git/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use crate::info::churn::FileChurn;
use anyhow::Result;
use gix::bstr::BString;
use gix::date::Time;
use globset::{Glob, GlobSetBuilder};
use std::collections::HashMap;

pub struct GitMetrics {
Expand Down Expand Up @@ -39,7 +40,8 @@ impl GitMetrics {
number_of_commits_by_file_path,
options.info.number_of_file_churns,
options.text_formatting.number_separator,
);
&options.info.exclude,
)?;

// This could happen if a branch pointed to non-commit object, so no traversal actually happens.
let (time_of_first_commit, time_of_most_recent_commit) = time_of_first_commit
Expand All @@ -62,19 +64,33 @@ fn compute_file_churns(
number_of_commits_by_file_path: HashMap<BString, usize>,
number_of_file_churns_to_display: usize,
number_separator: NumberSeparator,
) -> Vec<FileChurn> {
globs_to_exclude: &[String],
) -> Result<Vec<FileChurn>> {
let mut builder = GlobSetBuilder::new();
for glob in globs_to_exclude {
builder.add(Glob::new(glob)?);
}
let glob_set = builder.build()?;
let mut number_of_commits_by_file_path_sorted = Vec::from_iter(number_of_commits_by_file_path);

number_of_commits_by_file_path_sorted
.sort_by(|(_, a_count), (_, b_count)| b_count.cmp(a_count));

number_of_commits_by_file_path_sorted
Ok(number_of_commits_by_file_path_sorted
.into_iter()
.map(|(file_path, nbr_of_commits)| {
FileChurn::new(file_path.to_string(), nbr_of_commits, number_separator)
.filter_map(|(file_path, nbr_of_commits)| {
if !glob_set.is_match(file_path.to_string()) {
Some(FileChurn::new(
file_path.to_string(),
nbr_of_commits,
number_separator,
))
} else {
None
}
})
.take(number_of_file_churns_to_display)
.collect()
.collect())
}

fn compute_authors(
Expand Down Expand Up @@ -124,14 +140,14 @@ mod tests {
name: "John Doe".into(),
email: "johndoe@example.com".into(),
},
10,
30,
);
number_of_commits_by_signature.insert(
Sig {
name: "Jane Doe".into(),
email: "janedoe@example.com".into(),
},
5,
20,
);
number_of_commits_by_signature.insert(
Sig {
Expand All @@ -140,12 +156,12 @@ mod tests {
},
50,
);
let total_number_of_commits = 15;
let total_number_of_commits = 100;
let number_of_authors_to_display = 2;
let show_email = false;
let number_separator = NumberSeparator::Comma;

let (authors, total_number_of_authors) = compute_authors(
let (actual, total_number_of_authors) = compute_authors(
number_of_commits_by_signature,
total_number_of_commits,
number_of_authors_to_display,
Expand All @@ -154,31 +170,41 @@ mod tests {
);

assert_eq!(total_number_of_authors, 3);
assert_eq!(authors.len(), 2);
assert_eq!(authors.get(0).unwrap().name, "Ellen Smith".to_string());
let expected = vec![
Author::new(String::from("Ellen Smith"), None, 50, 100, number_separator),
Author::new(String::from("John Doe"), None, 30, 100, number_separator),
];
assert_eq!(actual, expected);
}

#[test]
fn test_compute_file_churns() {
fn test_compute_file_churns() -> Result<()> {
let mut number_of_commits_by_file_path = HashMap::new();
number_of_commits_by_file_path.insert("path/to/file1.txt".into(), 2);
number_of_commits_by_file_path.insert("path/to/file2.txt".into(), 5);
number_of_commits_by_file_path.insert("path/to/file3.txt".into(), 3);
number_of_commits_by_file_path.insert("path/to/file4.txt".into(), 7);
number_of_commits_by_file_path.insert("foo/x/y/file.txt".into(), 70);
number_of_commits_by_file_path.insert("foo/x/file.txt".into(), 10);

let number_of_file_churns_to_display = 3;
let number_separator = NumberSeparator::Comma;
let file_churns = compute_file_churns(
let globs_to_exclude = vec![
"foo/**/file.txt".to_string(),
"path/to/file2.txt".to_string(),
];
let actual = compute_file_churns(
number_of_commits_by_file_path,
number_of_file_churns_to_display,
number_separator,
);

assert_eq!(file_churns.len(), 3);
assert_eq!(
file_churns.get(0).unwrap().file_path,
"path/to/file4.txt".to_string()
);
assert_eq!(file_churns.get(0).unwrap().nbr_of_commits, 7);
&globs_to_exclude,
)?;
let expected = vec![
FileChurn::new(String::from("path/to/file4.txt"), 7, number_separator),
FileChurn::new(String::from("path/to/file3.txt"), 3, number_separator),
FileChurn::new(String::from("path/to/file1.txt"), 2, number_separator),
];
assert_eq!(actual, expected);
Ok(())
}
}
28 changes: 4 additions & 24 deletions src/info/langs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
use anyhow::{Context, Result};
use language::{Language, LanguageType};
use regex::Regex;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use strum::IntoEnumIterator;

pub mod language;
Expand All @@ -17,11 +15,11 @@ pub fn get_main_language(loc_by_language: &[(Language, usize)]) -> Language {
/// The vector is sorted by loc in descending order.
pub fn get_loc_by_language_sorted(
dir: &Path,
ignored_directories: &[PathBuf],
globs_to_exclude: &[String],
language_types: &[LanguageType],
include_hidden: bool,
) -> Result<Vec<(Language, usize)>> {
let stats = get_statistics(dir, ignored_directories, language_types, include_hidden);
let stats = get_statistics(dir, globs_to_exclude, language_types, include_hidden);

let loc_by_language =
get_loc_by_language(&stats).context("Could not find any source code in this repository")?;
Expand Down Expand Up @@ -65,7 +63,7 @@ pub fn get_total_loc(loc_by_language: &[(Language, usize)]) -> usize {

fn get_statistics(
dir: &Path,
ignored_directories: &[PathBuf],
globs_to_exclude: &[String],
language_types: &[LanguageType],
include_hidden: bool,
) -> tokei::Languages {
Expand All @@ -77,8 +75,7 @@ fn get_statistics(
hidden: Some(include_hidden),
..tokei::Config::default()
};
let user_ignored = get_ignored_directories(ignored_directories);
let ignored: Vec<&str> = user_ignored.iter().map(AsRef::as_ref).collect();
let ignored: Vec<&str> = globs_to_exclude.iter().map(AsRef::as_ref).collect();
languages.get_statistics(&[&dir], &ignored, &tokei_config);
languages
}
Expand All @@ -90,23 +87,6 @@ fn filter_languages_on_type(types: &[LanguageType]) -> Vec<tokei::LanguageType>
.collect()
}

fn get_ignored_directories(user_ignored_directories: &[PathBuf]) -> Vec<String> {
let mut ignored_directories = Vec::new();
if !user_ignored_directories.is_empty() {
let re = Regex::new(r"((.*)+/)+(.*)").unwrap();
for user_ignored_directory in user_ignored_directories {
let dir = user_ignored_directory.display().to_string();
if re.is_match(&dir) {
let prefix = if dir.starts_with('/') { "**" } else { "**/" };
ignored_directories.push(format!("{prefix}{dir}"));
} else {
ignored_directories.push(dir);
}
}
}
ignored_directories
}

#[cfg(test)]
mod test {
use super::*;
Expand Down
4 changes: 2 additions & 2 deletions src/info/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,14 @@ pub fn build_info(cli_options: &CliOptions) -> Result<Info> {
let repo_path = get_work_dir(&repo)?;

let loc_by_language_sorted_handle = std::thread::spawn({
let ignored_directories = cli_options.info.exclude.clone();
let globs_to_exclude = cli_options.info.exclude.clone();
let language_types = cli_options.info.r#type.clone();
let include_hidden = cli_options.info.include_hidden;
let workdir = repo_path.clone();
move || {
langs::get_loc_by_language_sorted(
&workdir,
&ignored_directories,
&globs_to_exclude,
&language_types,
include_hidden,
)
Expand Down

0 comments on commit 9b8ef7f

Please sign in to comment.