Skip to content

Commit

Permalink
Use shared state to reduce expensive clones (#218)
Browse files Browse the repository at this point in the history
This uses a shared state between iterations to reduce the number of expensive clones.
  • Loading branch information
spenserblack committed Oct 11, 2023
2 parents 8955215 + 79b237d commit 8bdfd88
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 44 deletions.
64 changes: 38 additions & 26 deletions gengo/src/file_source/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use gix::{
index,
prelude::FindExt,
worktree::{stack::state::attributes::Source as AttrSource, Stack as WTStack},
ThreadSafeRepository,
Repository, ThreadSafeRepository,
};
use std::borrow::Cow;
use std::path::Path;
Expand Down Expand Up @@ -36,7 +36,7 @@ impl Builder {
}

/// Constructs a [`State`] for the repository and rev.
fn state(&self) -> crate::Result<State> {
fn state(&self) -> crate::Result<(State, index::State)> {
let repo = self.repository.to_thread_local();
let tree_id = repo
.rev_parse_single(self.rev.as_str())?
Expand All @@ -51,16 +51,16 @@ impl Builder {
let state = State {
attr_stack,
attr_matches,
index_state,
};
Ok(state)
Ok((state, index_state))
}

fn build(self) -> crate::Result<Git> {
let state = self.state()?;
let (state, index_state) = self.state()?;
let git = Git {
repository: self.repository,
state,
index_state,
};
Ok(git)
}
Expand All @@ -69,6 +69,7 @@ impl Builder {
pub struct Git {
repository: ThreadSafeRepository,
state: State,
index_state: index::State,
}

impl Git {
Expand All @@ -93,46 +94,57 @@ impl<'repo> FileSource<'repo> for Git {
type Entry = &'repo index::Entry;
type Filepath = Cow<'repo, Path>;
type Contents = Vec<u8>;
type State = (State, Repository);
type Iter = Iter<'repo>;

fn entries(&'repo self) -> crate::Result<Self::Iter> {
let entries = self.state.index_state.entries().iter();
let entries = self.index_state.entries().iter();
Ok(Iter { entries })
}

fn filepath(&'repo self, entry: &Self::Entry) -> crate::Result<Self::Filepath> {
let path_storage = self.state.index_state.path_backing();
fn filepath(
&'repo self,
entry: &Self::Entry,
_state: &mut Self::State,
) -> crate::Result<Self::Filepath> {
let path_storage = self.index_state.path_backing();
let path = entry.path_in(path_storage);
let path = gix::path::try_from_bstr(path)?;
Ok(path)
}

fn contents(&'repo self, entry: &Self::Entry) -> crate::Result<Self::Contents> {
let repository = self.repository.to_thread_local();
fn contents(
&'repo self,
entry: &Self::Entry,
(_, repository): &mut Self::State,
) -> crate::Result<Self::Contents> {
let blob = repository.find_object(entry.id)?;
let blob = blob.detach();
let contents = blob.data;
Ok(contents)
}

fn overrides<O: AsRef<Path>>(&self, path: O) -> Overrides {
let repo = self.repository.to_thread_local();
let attr_matches = {
let mut attr_stack = self.state.attr_stack.clone();
let mut attr_matches = self.state.attr_matches.clone();
let Ok(platform) =
attr_stack.at_path(path, Some(false), |id, buf| repo.objects.find_blob(id, buf))
else {
// NOTE If we cannot get overrides, simply don't return them.
return Default::default();
};
platform.matching_attributes(&mut attr_matches);
attr_matches
fn state(&'repo self) -> crate::Result<Self::State> {
Ok((self.state.clone(), self.repository.to_thread_local()))
}

fn overrides<O: AsRef<Path>>(
&self,
path: O,
(state, repository): &mut Self::State,
) -> Overrides {
let Ok(platform) = state.attr_stack.at_path(path, Some(false), |id, buf| {
repository.objects.find_blob(id, buf)
}) else {
// NOTE If we cannot get overrides, simply don't return them.
return Default::default();
};
platform.matching_attributes(&mut state.attr_matches);

let attrs = {
let mut attrs = [None, None, None, None, None];
attr_matches
state
.attr_matches
.iter_selected()
.zip(attrs.iter_mut())
.for_each(|(info, slot)| {
Expand Down Expand Up @@ -194,8 +206,8 @@ impl<'repo> Iterator for Iter<'repo> {
}
}

struct State {
#[derive(Clone)]
pub struct State {
attr_stack: WTStack,
attr_matches: AttrOutcome,
index_state: index::State,
}
60 changes: 47 additions & 13 deletions gengo/src/file_source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,50 +11,84 @@ pub trait FileSource<'files>: Sync {
type Filepath: AsRef<Path>;
type Contents: AsRef<[u8]>;
type Entry: Send;
/// Sometimes it's necessary to share a state between iterations to reduce
/// expensive commands.
type State: Send + Clone;
type Iter: Iterator<Item = Self::Entry> + Send;

/// Returns an iterator over the entries use to get filenames and contents.
fn entries(&'files self) -> crate::Result<Self::Iter>;

/// Gets a filename from an entry.
fn filepath(&'files self, entry: &Self::Entry) -> crate::Result<Self::Filepath>;
fn filepath(
&'files self,
entry: &Self::Entry,
state: &mut Self::State,
) -> crate::Result<Self::Filepath>;

/// Gets file contents from an entry.
fn contents(&'files self, entry: &Self::Entry) -> crate::Result<Self::Contents>;
fn contents(
&'files self,
entry: &Self::Entry,
state: &mut Self::State,
) -> crate::Result<Self::Contents>;

/// Gets a state that can be shared between threads.
fn state(&'files self) -> crate::Result<Self::State>;

/// Provides combined overrides for the file.
fn overrides<O: AsRef<Path>>(&self, path: O) -> Overrides {
fn overrides<O: AsRef<Path>>(&self, path: O, state: &mut Self::State) -> Overrides {
Overrides {
language: self.language_override(&path),
is_documentation: self.is_documentation_override(&path),
is_generated: self.is_generated_override(&path),
is_vendored: self.is_vendored_override(&path),
is_detectable: self.is_detectable_override(&path),
language: self.language_override(&path, state),
is_documentation: self.is_documentation_override(&path, state),
is_generated: self.is_generated_override(&path, state),
is_vendored: self.is_vendored_override(&path, state),
is_detectable: self.is_detectable_override(&path, state),
}
}

/// Provides an optional override for the detected language.
fn language_override<O: AsRef<Path>>(&self, _path: O) -> Option<String> {
fn language_override<O: AsRef<Path>>(
&self,
_path: O,
_state: &mut Self::State,
) -> Option<String> {
None
}

/// Provides an optional override for documentation file detection.
fn is_documentation_override<O: AsRef<Path>>(&self, _path: O) -> Option<bool> {
fn is_documentation_override<O: AsRef<Path>>(
&self,
_path: O,
_state: &mut Self::State,
) -> Option<bool> {
None
}

/// Provides an optional override for generated file detection.
fn is_generated_override<O: AsRef<Path>>(&self, _path: O) -> Option<bool> {
fn is_generated_override<O: AsRef<Path>>(
&self,
_path: O,
_state: &mut Self::State,
) -> Option<bool> {
None
}

/// Provides an optional override for vendored file detection.
fn is_vendored_override<O: AsRef<Path>>(&self, _path: O) -> Option<bool> {
fn is_vendored_override<O: AsRef<Path>>(
&self,
_path: O,
_state: &mut Self::State,
) -> Option<bool> {
None
}

/// Provides an optional override for if the file is detectable.
fn is_detectable_override<O: AsRef<Path>>(&self, _path: O) -> Option<bool> {
fn is_detectable_override<O: AsRef<Path>>(
&self,
_path: O,
_state: &mut Self::State,
) -> Option<bool> {
None
}
}
Expand Down
13 changes: 8 additions & 5 deletions gengo/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,16 +69,18 @@ pub struct Gengo<FS: for<'fs> FileSource<'fs>> {
impl<FS: for<'fs> FileSource<'fs>> Gengo<FS> {
/// Analyzes each file in the repository at the given revision.
pub fn analyze(&self) -> Result<Analysis> {
let state = self.file_source.state()?;
let entries: Vec<(_, _)> = self
.file_source
.entries()?
.par_bridge()
.filter_map(|entry| {
let filepath = self.file_source.filepath(&entry).ok()?;
let contents = self.file_source.contents(&entry).ok()?;
let entry = self.analyze_blob(&filepath, contents)?;
.map_with(state, |state, entry| {
let filepath = self.file_source.filepath(&entry, state).ok()?;
let contents = self.file_source.contents(&entry, state).ok()?;
let entry = self.analyze_blob(&filepath, contents, state)?;
Some((filepath.as_ref().to_owned(), entry))
})
.filter_map(|entry| entry)
.collect();

Ok(Analysis(entries))
Expand All @@ -88,8 +90,9 @@ impl<FS: for<'fs> FileSource<'fs>> Gengo<FS> {
&self,
filepath: impl AsRef<Path>,
contents: impl AsRef<[u8]>,
state: &mut <FS as FileSource>::State,
) -> Option<Entry> {
let overrides = self.file_source.overrides(&filepath);
let overrides = self.file_source.overrides(&filepath, state);
let filepath = filepath.as_ref();
let contents = contents.as_ref();

Expand Down

0 comments on commit 8bdfd88

Please sign in to comment.